-
Notifications
You must be signed in to change notification settings - Fork 232
Personalized page rank #502
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
198616d
54a9491
1d06b64
a11ba98
da8bdcc
4e358c4
12ddd8f
f2eb07c
91ed5d5
79f4c84
71124e1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,236 @@ | ||
| // Copyright 2026 Emmanouil Krasanakis | ||
|
|
||
| // Distributed under the Boost Software License, Version 1.0. | ||
| // (See accompanying file LICENSE_1_0.txt or copy at | ||
| // http://www.boost.org/LICENSE_1_0.txt) | ||
|
|
||
| // Authors: Emmanouil Krasanakis | ||
|
|
||
| #ifndef BOOST_GRAPH_PERSONALIZED_PAGE_RANK_HPP | ||
| #define BOOST_GRAPH_PERSONALIZED_PAGE_RANK_HPP | ||
|
|
||
| #include <boost/property_map/property_map.hpp> | ||
| #include <boost/graph/graph_traits.hpp> | ||
| #include <boost/graph/properties.hpp> | ||
| #include <boost/graph/iteration_macros.hpp> | ||
| #include <boost/graph/overloading.hpp> | ||
| #include <boost/graph/detail/mpi_include.hpp> | ||
| #include <boost/property_map/function_property_map.hpp> | ||
| #include <vector> | ||
|
|
||
| namespace boost | ||
| { | ||
| namespace graph | ||
| { | ||
| struct rank_convergence | ||
| { | ||
| explicit rank_convergence(std::size_t iters, double tol=0) : iters(iters), tol(tol) {} // allowing tolerance for early stopping | ||
| template < typename RankMap, typename RankMap2, typename Graph > | ||
| bool operator()(const RankMap& current, const RankMap2& previous, const Graph& g) | ||
| { | ||
| if (--iters == 0) | ||
| return true; | ||
| if (!tol) | ||
| return false; | ||
| using rank_type = typename property_traits< RankMap >::value_type; | ||
| rank_type sum_abs(0); | ||
| for (auto v : boost::make_iterator_range(vertices(g))) | ||
| sum_abs += std::abs(get(current, v) - get(previous, v)); | ||
| return sum_abs*num_vertices(g)<tol; | ||
| } | ||
| std::size_t iters; | ||
| double tol; | ||
| }; | ||
|
|
||
| namespace personalized_page_rank_detail | ||
| { | ||
| template < | ||
| typename Graph, | ||
| typename WeightMap, | ||
| typename PersonalizationMap, | ||
| typename RankMap, | ||
| typename RankMap2 > | ||
| void personalized_page_rank_step( | ||
| const Graph& g, | ||
| WeightMap weight_map, | ||
| PersonalizationMap personalization_map, | ||
| RankMap from_rank, | ||
| RankMap2 to_rank, | ||
| typename property_traits< RankMap >::value_type damping, | ||
| incidence_graph_tag) | ||
| { | ||
| using rank_type = typename property_traits< RankMap >::value_type; | ||
| rank_type l1_norm(0); // Computing the norm simultaneously avoids an extra summing iteration. | ||
|
|
||
| // Initialize the constant part of maps. | ||
| for (auto v : boost::make_iterator_range(vertices(g))) | ||
| { | ||
| auto v_constant = rank_type(1 - damping) * get(personalization_map, v); | ||
| put(to_rank, v, v_constant); | ||
| l1_norm += v_constant; | ||
| } | ||
|
|
||
| // Accumulate from neighbors. | ||
| for (auto u : boost::make_iterator_range(vertices(g))) | ||
| { | ||
| rank_type u_rank_factor = damping * get(from_rank, u); | ||
| rank_type l1_accumulated_norm(0); // TBD: Consider making l1_norm volatile to reduce accumulation errors. | ||
| for (auto e : boost::make_iterator_range(out_edges(u, g))) | ||
| { | ||
| auto v = target(e, g); | ||
| rank_type u_rank_out = get(weight_map, e)*u_rank_factor; | ||
| put(to_rank, v, get(to_rank, v) + u_rank_out); | ||
| l1_accumulated_norm += u_rank_out; | ||
| } | ||
| l1_norm += l1_accumulated_norm; | ||
| } | ||
| // If there are negative edge weights, or if negative damping is used, l1_norm could be zero or near-zero. | ||
| // Division in those cases is conceptually correct for floating point weights, and actually expected behavior. | ||
| // That said, such edge cases are impossible to arise for all typical algorithm uses. | ||
| for (auto v : boost::make_iterator_range(vertices(g))) | ||
| put(to_rank, v, get(to_rank, v)/l1_norm); | ||
| } | ||
|
|
||
| template < | ||
| typename Graph, | ||
| typename WeightMap, | ||
| typename PersonalizationMap, | ||
| typename RankMap, | ||
| typename RankMap2 > | ||
| void personalized_page_rank_step( | ||
| const Graph& g, | ||
| WeightMap weight_map, | ||
| PersonalizationMap personalization_map, | ||
| RankMap from_rank, | ||
| RankMap2 to_rank, | ||
| typename property_traits< RankMap >::value_type damping, | ||
| bidirectional_graph_tag) | ||
| { | ||
| using damping_type = typename property_traits< RankMap >::value_type; | ||
| damping_type l1_norm(0); // Computing the norm simultaneously avoids an extra summing iteration. | ||
| for (auto v : boost::make_iterator_range(vertices(g))) | ||
| { | ||
| damping_type rank(0); | ||
| for (auto e : boost::make_iterator_range(in_edges(v, g))) | ||
| rank += get(from_rank, source(e, g))*get(weight_map, e); | ||
| auto v_score = (damping_type(1) - damping) * get(personalization_map, v) + damping * rank; | ||
| put(to_rank, v, v_score); | ||
| l1_norm += v_score; | ||
| } | ||
| // See above function for potential division by zero comments. | ||
| for (auto v : boost::make_iterator_range(vertices(g))) | ||
| put(to_rank, v, get(to_rank, v)/l1_norm); | ||
| } | ||
| } // end namespace personalized_page_rank_detail | ||
|
|
||
| template < | ||
| typename Graph, | ||
| typename WeightMap, | ||
| typename PersonalizationMap, | ||
| typename RankMap, | ||
| typename Done, | ||
| typename RankMap2 > | ||
| Done personalized_page_rank( | ||
| const Graph& g, | ||
| WeightMap weight_map, | ||
| PersonalizationMap personalization_map, | ||
| RankMap rank_map, | ||
| Done done, | ||
| typename property_traits< RankMap >::value_type damping, | ||
| RankMap2 rank_map2 | ||
| BOOST_GRAPH_ENABLE_IF_MODELS_PARM(Graph, vertex_list_graph_tag)) | ||
| { | ||
| using rank_type = typename property_traits< PersonalizationMap >::value_type; | ||
| rank_type personalization_norm(0); | ||
| for (auto v : boost::make_iterator_range(vertices(g))) | ||
| personalization_norm += get(personalization_map, v); | ||
|
|
||
| // TBD: This implementation couples iterators when possible under reduced L1 cache invalidation assumptions, | ||
| // but this is not necessarily the case because we may be grabbing 2x memory lanes each time to write there. | ||
| // Could investigate which pattern is faster in the future. | ||
| for (auto v : boost::make_iterator_range(vertices(g))) | ||
| { | ||
| rank_type value = get(personalization_map, v)/personalization_norm; | ||
| put(personalization_map, v, value); | ||
| put(rank_map, v, value); | ||
| } | ||
|
|
||
| bool to_map_2 = true; | ||
| do | ||
| { | ||
| typedef typename graph_traits< Graph >::traversal_category category; | ||
| if (to_map_2) | ||
| personalized_page_rank_detail::personalized_page_rank_step(g, weight_map, personalization_map, rank_map, rank_map2, damping, category()); | ||
| else | ||
| personalized_page_rank_detail::personalized_page_rank_step(g, weight_map, personalization_map, rank_map2, rank_map, damping, category()); | ||
| to_map_2 = !to_map_2; | ||
| } | ||
| while ((to_map_2 && !done(rank_map, rank_map2, g)) || (!to_map_2 && !done(rank_map2, rank_map, g))); // Done may not be symmetric. | ||
|
|
||
| // Now multiply the result with personalization_norm to restore the order of magnitude and store it in rank_map. | ||
| // Also restore the original personalization_map's magnitude for reuse (this is lossy up to numerical tolerance | ||
| // but leaner than making a copy). | ||
| if (!to_map_2) | ||
| { | ||
| for (auto v : boost::make_iterator_range(vertices(g))) | ||
| { | ||
| put(rank_map, v, get(rank_map2, v)*personalization_norm); | ||
| put(personalization_map, v, get(personalization_map, v)*personalization_norm); | ||
| } | ||
| } | ||
| else | ||
| { | ||
| for (auto v : boost::make_iterator_range(vertices(g))) | ||
| { | ||
| put(rank_map, v, get(rank_map, v)*personalization_norm); | ||
| put(personalization_map, v, get(personalization_map, v)*personalization_norm); | ||
| } | ||
| } | ||
| return done; | ||
| } | ||
|
|
||
| template < | ||
| typename Graph, | ||
| typename WeightMap, | ||
| typename PersonalizationMap, | ||
| typename RankMap, | ||
| typename Done > | ||
| Done personalized_page_rank( | ||
| const Graph& g, | ||
| WeightMap weight_map, | ||
| PersonalizationMap personalization_map, | ||
| RankMap rank_map, | ||
| Done done, | ||
| typename property_traits< RankMap >::value_type damping) | ||
| { | ||
| using rank_type = typename property_traits< RankMap >::value_type; | ||
| std::vector< rank_type > ranks2(num_vertices(g)); | ||
| return personalized_page_rank(g, weight_map, personalization_map, rank_map, done, damping, | ||
| make_iterator_property_map(ranks2.begin(), get(vertex_index, g))); | ||
| } | ||
|
|
||
| template < typename Graph, typename PersonalizationMap, typename RankMap > | ||
| rank_convergence personalized_page_rank( | ||
| const Graph& g, | ||
| PersonalizationMap personalization_map, | ||
| RankMap rank_map, | ||
| typename property_traits< RankMap >::value_type damping=0.85) | ||
| { | ||
| // This is the most traditional personalized PageRank implementation, with minimized signature. | ||
| using Edge = typename graph_traits<Graph>::edge_descriptor; | ||
| using rank_type = typename property_traits< RankMap >::value_type; | ||
| std::vector< rank_type > ranks2(num_vertices(g)); | ||
| auto markovian_weights = make_function_property_map<Edge, double>([&g](Edge e){ return 1.0 / out_degree(source(e, g), g); }); | ||
| return personalized_page_rank(g, | ||
| markovian_weights, | ||
| personalization_map, | ||
| rank_map, | ||
| rank_convergence(100, 1.E-9), | ||
| damping, | ||
| make_iterator_property_map(ranks2.begin(), get(vertex_index, g))); | ||
| } | ||
|
|
||
| } | ||
| } // end namespace boost::graph | ||
|
|
||
| #endif // BOOST_GRAPH_PERSONALIZED_PAGE_RANK_HPP | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
|
|
||
| #include <boost/graph/adjacency_list.hpp> | ||
| #include <boost/graph/personalized_page_rank.hpp> | ||
| #include <boost/property_map/property_map.hpp> | ||
| #include <iostream> | ||
| #include <vector> | ||
| #include <iomanip> | ||
|
|
||
| int main(int, char*[]) { | ||
| using namespace boost; | ||
| // deliberately hard (slow-converging) graph | ||
| using Graph = adjacency_list<vecS, vecS, directedS>; | ||
| std::vector<std::pair<int,int>> edges = { | ||
| {0,1},{1,0},{1,2},{2,1},{2,3},{3,2}, | ||
| {4,5},{5,4},{5,6},{6,5},{6,7},{7,6},{7,8},{8,7},{8,9},{9,8},{9,10},{10,9}, | ||
| {0,3},{3,0},{1,3},{3,1},{1,4},{4,1}, | ||
| {4,6},{6,4},{6,9},{9,6},{6,8},{8,6},{7,9},{9,7},{8,10},{10,8}, | ||
| {11,10},{10,11},{10,12},{12,10} | ||
| }; | ||
| Graph g(edges.begin(), edges.end(), 13); | ||
|
|
||
| std::vector<double> ranks(num_vertices(g)); | ||
| auto rank_map = make_iterator_property_map(ranks.begin(), get(vertex_index, g)); | ||
| std::vector<double> personalization(num_vertices(g)); | ||
| auto personalization_map = make_iterator_property_map(personalization.begin(), get(vertex_index, g)); | ||
| personalization[0] = 1; | ||
| personalization[1] = 1; | ||
| personalization[2] = 1; | ||
| personalization[3] = 1; | ||
|
|
||
| std::size_t max_iters(100); // Convergence is so bad in this graph that it needs such a high cap. | ||
| using Edge = graph_traits<Graph>::edge_descriptor; | ||
| auto weight = make_function_property_map<Edge, double>( | ||
| [&g](Edge e){ return 1.0 / std::sqrt(double(out_degree(source(e, g), g) * out_degree(target(e, g), g))); }); | ||
| auto convergence1 = graph::rank_convergence(max_iters, 1.E-9); | ||
| convergence1 = graph::personalized_page_rank(g, weight, personalization_map, rank_map, convergence1, 0.9); | ||
|
|
||
| std::cout << "ended after "<<max_iters-convergence1.iters << " iterations\n"; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For last round of review: please refrain from using standard output in tests. Standard outputs are brittle (can't be automatically checked in CI) and make poor non-regression tools.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That being said, for a later documentation PR, the
PS: yes this should go to the contribution guidelines, I just added an appendix section about it in #495 : thank you 🙏🏽 |
||
| std::cout << std::fixed << std::setprecision(4); | ||
| for (std::size_t v = 0; v < num_vertices(g); ++v) | ||
| { | ||
| std::cout << "vertex " << v << " rank=" << ranks[v] << "\n"; | ||
| } | ||
| BOOST_ASSERT(ranks[0]==ranks[2]); // should be exactly equal due to symmetry, even when considering floating point coarseness | ||
| BOOST_ASSERT(ranks[0]<ranks[1]+0.1); | ||
| BOOST_ASSERT(ranks[8]==ranks[9]); | ||
| BOOST_ASSERT(ranks[11]==ranks[12]); | ||
| BOOST_ASSERT(ranks[11]>0); | ||
| BOOST_ASSERT(convergence1.iters<max_iters); | ||
|
|
||
| // COMPUTING A HIGH-PASS VERSION BY PASSING NEGATIVE DAMPING (resulting values can be negative) | ||
| // This is not completely correct yet for computing graph gradients, because it needs to eskew normalization, | ||
| // so we need to be able to customize said normalization. | ||
| // Damping should generaly be in the range [-1,1]. | ||
|
Becheler marked this conversation as resolved.
|
||
| auto renorm_weight = make_function_property_map<Edge, double>( | ||
| [&g](Edge e){ return 1.0 / std::sqrt(double((1.0+out_degree(source(e, g), g)) * (1.0+out_degree(target(e, g), g)))); }); | ||
| auto convergence2 = graph::rank_convergence(max_iters, 1.E-9); | ||
| convergence2 = graph::personalized_page_rank(g, renorm_weight, personalization_map, rank_map, convergence2, -0.8); | ||
| std::cout << "ended after "<<max_iters-convergence2.iters << " iterations\n"; | ||
| std::cout << std::fixed << std::setprecision(4); | ||
| for (std::size_t v = 0; v < num_vertices(g); ++v) | ||
| { | ||
| std::cout << "vertex " << v << " flow=" << ranks[v] << "\n"; | ||
| } | ||
| BOOST_ASSERT(ranks[0]==ranks[2]); // should be exactly equal due to symmetry, even when considering floating point coarseness | ||
| BOOST_ASSERT(ranks[0]>ranks[1]+0.1); | ||
| BOOST_ASSERT(ranks[8]==ranks[9]); | ||
| BOOST_ASSERT(ranks[11]==ranks[12]); | ||
| BOOST_ASSERT(ranks[11]<0); | ||
| BOOST_ASSERT(convergence2.iters<max_iters); | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.