1 /*
2 * Copyright (c) 2003, the JUNG Project and the Regents of the University
3 * of California
4 * All rights reserved.
5 *
6 * This software is open-source under the BSD license; see either
7 * "license.txt" or
8 * http://jung.sourceforge.net/license.txt for a description.
9 */
10 package edu.uci.ics.jung.algorithms.metrics;
11
12 import java.util.ArrayList;
13 import java.util.HashSet;
14 import java.util.List;
15 import java.util.Set;
16
17 import org.apache.commons.collections15.CollectionUtils;
18
19 import edu.uci.ics.jung.graph.DirectedGraph;
20 import edu.uci.ics.jung.graph.Graph;
21
22
23 /**
24 * TriadicCensus is a standard social network tool that counts, for each of the
25 * different possible configurations of three vertices, the number of times
26 * that that configuration occurs in the given graph.
27 * This may then be compared to the set of expected counts for this particular
28 * graph or to an expected sample. This is often used in p* modeling.
29 * <p>
30 * To use this class,
31 * <pre>
32 * long[] triad_counts = TriadicCensus(dg);
33 * </pre>
34 * where <code>dg</code> is a <code>DirectedGraph</code>.
35 * ith element of the array (for i in [1,16]) is the number of
36 * occurrences of the corresponding triad type.
37 * (The 0th element is not meaningful; this array is effectively 1-based.)
38 * To get the name of the ith triad (e.g. "003"),
39 * look at the global constant array c.TRIAD_NAMES[i]
40 * <p>
41 * Triads are named as
42 * (number of pairs that are mutually tied)
43 * (number of pairs that are one-way tied)
44 * (number of non-tied pairs)
45 * in the triple. Since there are be only three pairs, there is a finite
46 * set of these possible triads.
47 * <p>
48 * In fact, there are exactly 16, conventionally sorted by the number of
49 * realized edges in the triad:
50 * <table>
51 * <tr><th>Number</th> <th>Configuration</th> <th>Notes</th></tr>
52 * <tr><td>1</td><td>003</td><td>The empty triad</td></tr>
53 * <tr><td>2</td><td>012</td><td></td></tr>
54 * <tr><td>3</td><td>102</td><td></td></tr>
55 * <tr><td>4</td><td>021D</td><td>"Down": the directed edges point away</td></tr>
56 * <tr><td>5</td><td>021U</td><td>"Up": the directed edges meet</td></tr>
57 * <tr><td>6</td><td>021C</td><td>"Circle": one in, one out</td></tr>
58 * <tr><td>7</td><td>111D</td><td>"Down": 021D but one edge is mutual</td></tr>
59 * <tr><td>8</td><td>111U</td><td>"Up": 021U but one edge is mutual</td></tr>
60 * <tr><td>9</td><td>030T</td><td>"Transitive": two point to the same vertex</td></tr>
61 * <tr><td>10</td><td>030C</td><td>"Circle": A->B->C->A</td></tr>
62 * <tr><td>11</td><td>201</td><td></td></tr>
63 * <tr><td>12</td><td>120D</td><td>"Down": 021D but the third edge is mutual</td></tr>
64 * <tr><td>13</td><td>120U</td><td>"Up": 021U but the third edge is mutual</td></tr>
65 * <tr><td>14</td><td>120C</td><td>"Circle": 021C but the third edge is mutual</td></tr>
66 * <tr><td>15</td><td>210</td><td></td></tr>
67 * <tr><td>16</td><td>300</td><td>The complete</td></tr>
68 * </table>
69 * <p>
70 * This implementation takes O( m ), m is the number of edges in the graph.
71 * <br>
72 * It is based on
73 * <a href="http://vlado.fmf.uni-lj.si/pub/networks/doc/triads/triads.pdf">
74 * A subquadratic triad census algorithm for large sparse networks
75 * with small maximum degree</a>
76 * Vladimir Batagelj and Andrej Mrvar, University of Ljubljana
77 * Published in Social Networks.
78 * @author Danyel Fisher
79 * @author Tom Nelson - converted to jung2
80 *
81 */
82 public class TriadicCensus {
83
84 // NOTE THAT THIS RETURNS STANDARD 1-16 COUNT!
85
86 // and their types
87 public static final String[] TRIAD_NAMES = { "N/A", "003", "012", "102", "021D",
88 "021U", "021C", "111D", "111U", "030T", "030C", "201", "120D",
89 "120U", "120C", "210", "300" };
90
91 public static final int MAX_TRIADS = TRIAD_NAMES.length;
92
93 /**
94 * Returns an array whose ith element (for i in [1,16]) is the number of
95 * occurrences of the corresponding triad type in <code>g</code>.
96 * (The 0th element is not meaningful; this array is effectively 1-based.)
97 *
98 * @param g
99 */
100 public static <V,E> long[] getCounts(DirectedGraph<V,E> g) {
101 long[] count = new long[MAX_TRIADS];
102
103 List<V> id = new ArrayList<V>(g.getVertices());
104
105 // apply algorithm to each edge, one at at time
106 for (int i_v = 0; i_v < g.getVertexCount(); i_v++) {
107 V v = id.get(i_v);
108 for(V u : g.getNeighbors(v)) {
109 int triType = -1;
110 if (id.indexOf(u) <= i_v)
111 continue;
112 Set<V> neighbors = new HashSet<V>(CollectionUtils.union(g.getNeighbors(u), g.getNeighbors(v)));
113 neighbors.remove(u);
114 neighbors.remove(v);
115 if (g.isSuccessor(v,u) && g.isSuccessor(u,v)) {
116 triType = 3;
117 } else {
118 triType = 2;
119 }
120 count[triType] += g.getVertexCount() - neighbors.size() - 2;
121 for (V w : neighbors) {
122 if (shouldCount(g, id, u, v, w)) {
123 count [ triType ( triCode(g, u, v, w) ) ] ++;
124 }
125 }
126 }
127 }
128 int sum = 0;
129 for (int i = 2; i <= 16; i++) {
130 sum += count[i];
131 }
132 int n = g.getVertexCount();
133 count[1] = n * (n-1) * (n-2) / 6 - sum;
134 return count;
135 }
136
137 /**
138 * This is the core of the technique in the paper. Returns an int from 0 to
139 * 65 based on: WU -> 32 UW -> 16 WV -> 8 VW -> 4 UV -> 2 VU -> 1
140 *
141 */
142 public static <V,E> int triCode(Graph<V,E> g, V u, V v, V w) {
143 int i = 0;
144 i += link(g, v, u ) ? 1 : 0;
145 i += link(g, u, v ) ? 2 : 0;
146 i += link(g, v, w ) ? 4 : 0;
147 i += link(g, w, v ) ? 8 : 0;
148 i += link(g, u, w ) ? 16 : 0;
149 i += link(g, w, u ) ? 32 : 0;
150 return i;
151 }
152
153 protected static <V,E> boolean link(Graph<V,E> g, V a, V b) {
154 return g.isPredecessor(b, a);
155 }
156
157
158 /**
159 * Simply returns the triCode.
160 * @param triCode
161 * @return the string code associated with the numeric type
162 */
163 public static int triType( int triCode ) {
164 return codeToType[ triCode ];
165 }
166
167 /**
168 * For debugging purposes, this is copied straight out of the paper which
169 * means that they refer to triad types 1-16.
170 */
171 protected static final int[] codeToType = { 1, 2, 2, 3, 2, 4, 6, 8, 2, 6, 5, 7, 3, 8,
172 7, 11, 2, 6, 4, 8, 5, 9, 9, 13, 6, 10, 9, 14, 7, 14, 12, 15, 2, 5,
173 6, 7, 6, 9, 10, 14, 4, 9, 9, 12, 8, 13, 14, 15, 3, 7, 8, 11, 7, 12,
174 14, 15, 8, 14, 13, 15, 11, 15, 15, 16 };
175
176 /**
177 * Make sure we have a canonical ordering: Returns true if u < w, or v < w <
178 * u and v doesn't link to w
179 *
180 * @param id
181 * @param u
182 * @param v
183 * @param w
184 * @return true if u < w, or if v < w < u and v doesn't link to w; false otherwise
185 */
186 protected static <V,E> boolean shouldCount(Graph<V,E> g, List<V> id, V u, V v, V w) {
187 int i_u = id.indexOf(u);
188 int i_w = id.indexOf(w);
189 if (i_u < i_w)
190 return true;
191 int i_v = id.indexOf(v);
192 if ((i_v < i_w) && (i_w < i_u) && (!g.isNeighbor(w,v)))
193 return true;
194 return false;
195 }
196 }