View Javadoc

1   /*
2    * Copyright (c) 2003, the JUNG Project and the Regents of the University 
3    * of California
4    * All rights reserved.
5    *
6    * This software is open-source under the BSD license; see either
7    * "license.txt" or
8    * http://jung.sourceforge.net/license.txt for a description.
9    */
10  package edu.uci.ics.jung.algorithms.metrics;
11  
12  import java.util.ArrayList;
13  import java.util.HashSet;
14  import java.util.List;
15  import java.util.Set;
16  
17  import org.apache.commons.collections15.CollectionUtils;
18  
19  import edu.uci.ics.jung.graph.DirectedGraph;
20  import edu.uci.ics.jung.graph.Graph;
21  
22  
23  /**
24   * TriadicCensus is a standard social network tool that counts, for each of the 
25   * different possible configurations of three vertices, the number of times
26   * that that configuration occurs in the given graph.
27   * This may then be compared to the set of expected counts for this particular
28   * graph or to an expected sample. This is often used in p* modeling.
29   * <p>
30   * To use this class, 
31   * <pre>
32   * long[] triad_counts = TriadicCensus(dg);
33   * </pre>
34   * where <code>dg</code> is a <code>DirectedGraph</code>.
35   * ith element of the array (for i in [1,16]) is the number of 
36   * occurrences of the corresponding triad type.
37   * (The 0th element is not meaningful; this array is effectively 1-based.)
38   * To get the name of the ith triad (e.g. "003"), 
39   * look at the global constant array c.TRIAD_NAMES[i]
40   * <p>
41   * Triads are named as 
42   * (number of pairs that are mutually tied)
43   * (number of pairs that are one-way tied)
44   * (number of non-tied pairs)
45   * in the triple. Since there are be only three pairs, there is a finite
46   * set of these possible triads.
47   * <p>
48   * In fact, there are exactly 16, conventionally sorted by the number of 
49   * realized edges in the triad:
50   * <table>
51   * <tr><th>Number</th> <th>Configuration</th> <th>Notes</th></tr>
52   * <tr><td>1</td><td>003</td><td>The empty triad</td></tr>
53   * <tr><td>2</td><td>012</td><td></td></tr>
54   * <tr><td>3</td><td>102</td><td></td></tr>
55   * <tr><td>4</td><td>021D</td><td>"Down": the directed edges point away</td></tr>
56   * <tr><td>5</td><td>021U</td><td>"Up": the directed edges meet</td></tr>
57   * <tr><td>6</td><td>021C</td><td>"Circle": one in, one out</td></tr>
58   * <tr><td>7</td><td>111D</td><td>"Down": 021D but one edge is mutual</td></tr>
59   * <tr><td>8</td><td>111U</td><td>"Up": 021U but one edge is mutual</td></tr>
60   * <tr><td>9</td><td>030T</td><td>"Transitive": two point to the same vertex</td></tr>
61   * <tr><td>10</td><td>030C</td><td>"Circle": A->B->C->A</td></tr>
62   * <tr><td>11</td><td>201</td><td></td></tr>
63   * <tr><td>12</td><td>120D</td><td>"Down": 021D but the third edge is mutual</td></tr>
64   * <tr><td>13</td><td>120U</td><td>"Up": 021U but the third edge is mutual</td></tr>
65   * <tr><td>14</td><td>120C</td><td>"Circle": 021C but the third edge is mutual</td></tr>
66   * <tr><td>15</td><td>210</td><td></td></tr>
67   * <tr><td>16</td><td>300</td><td>The complete</td></tr>
68   * </table>
69   * <p>
70   * This implementation takes O( m ), m is the number of edges in the graph. 
71   * <br>
72   * It is based on 
73   * <a href="http://vlado.fmf.uni-lj.si/pub/networks/doc/triads/triads.pdf">
74   * A subquadratic triad census algorithm for large sparse networks 
75   * with small maximum degree</a>
76   * Vladimir Batagelj and Andrej Mrvar, University of Ljubljana
77   * Published in Social Networks.
78   * @author Danyel Fisher
79   * @author Tom Nelson - converted to jung2
80   *
81   */
82  public class TriadicCensus {
83  
84  	// NOTE THAT THIS RETURNS STANDARD 1-16 COUNT!
85  
86  	// and their types
87  	public static final String[] TRIAD_NAMES = { "N/A", "003", "012", "102", "021D",
88  			"021U", "021C", "111D", "111U", "030T", "030C", "201", "120D",
89  			"120U", "120C", "210", "300" };
90  
91  	public static final int MAX_TRIADS = TRIAD_NAMES.length;
92  
93  	/**
94       * Returns an array whose ith element (for i in [1,16]) is the number of 
95       * occurrences of the corresponding triad type in <code>g</code>.
96       * (The 0th element is not meaningful; this array is effectively 1-based.)
97  	 * 
98  	 * @param g
99  	 */
100     public static <V,E> long[] getCounts(DirectedGraph<V,E> g) {
101         long[] count = new long[MAX_TRIADS];
102 
103         List<V> id = new ArrayList<V>(g.getVertices());
104 
105 		// apply algorithm to each edge, one at at time
106 		for (int i_v = 0; i_v < g.getVertexCount(); i_v++) {
107 			V v = id.get(i_v);
108 			for(V u : g.getNeighbors(v)) {
109 				int triType = -1;
110 				if (id.indexOf(u) <= i_v)
111 					continue;
112 				Set<V> neighbors = new HashSet<V>(CollectionUtils.union(g.getNeighbors(u), g.getNeighbors(v)));
113 				neighbors.remove(u);
114 				neighbors.remove(v);
115 				if (g.isSuccessor(v,u) && g.isSuccessor(u,v)) {
116 					triType = 3;
117 				} else {
118 					triType = 2;
119 				}
120 				count[triType] += g.getVertexCount() - neighbors.size() - 2;
121 				for (V w : neighbors) {
122 					if (shouldCount(g, id, u, v, w)) {
123 						count [ triType ( triCode(g, u, v, w) ) ] ++;
124 					}
125 				}
126 			}
127 		}
128 		int sum = 0;
129 		for (int i = 2; i <= 16; i++) {
130 			sum += count[i];
131 		}
132 		int n = g.getVertexCount();
133 		count[1] = n * (n-1) * (n-2) / 6 - sum;
134 		return count;		
135 	}
136 
137 	/**
138 	 * This is the core of the technique in the paper. Returns an int from 0 to
139 	 * 65 based on: WU -> 32 UW -> 16 WV -> 8 VW -> 4 UV -> 2 VU -> 1
140 	 * 
141 	 */
142 	public static <V,E> int triCode(Graph<V,E> g, V u, V v, V w) {
143 		int i = 0;
144 		i += link(g, v, u ) ? 1 : 0;
145 		i += link(g, u, v ) ? 2 : 0;
146 		i += link(g, v, w ) ? 4 : 0;
147 		i += link(g, w, v ) ? 8 : 0;
148 		i += link(g, u, w ) ? 16 : 0;
149 		i += link(g, w, u ) ? 32 : 0;
150 		return i;
151 	}
152 
153 	protected static <V,E> boolean link(Graph<V,E> g, V a, V b) {
154 		return g.isPredecessor(b, a);
155 	}
156 	
157 	
158 	/**
159 	 * Simply returns the triCode. 
160 	 * @param triCode
161 	 * @return the string code associated with the numeric type
162 	 */
163 	public static int triType( int triCode ) {
164 		return codeToType[ triCode ];
165 	}
166 
167 	/**
168 	 * For debugging purposes, this is copied straight out of the paper which
169 	 * means that they refer to triad types 1-16.
170 	 */
171 	protected static final int[] codeToType = { 1, 2, 2, 3, 2, 4, 6, 8, 2, 6, 5, 7, 3, 8,
172 			7, 11, 2, 6, 4, 8, 5, 9, 9, 13, 6, 10, 9, 14, 7, 14, 12, 15, 2, 5,
173 			6, 7, 6, 9, 10, 14, 4, 9, 9, 12, 8, 13, 14, 15, 3, 7, 8, 11, 7, 12,
174 			14, 15, 8, 14, 13, 15, 11, 15, 15, 16 };
175 
176 	/**
177 	 * Make sure we have a canonical ordering: Returns true if u < w, or v < w <
178 	 * u and v doesn't link to w
179 	 * 
180 	 * @param id
181 	 * @param u
182 	 * @param v
183 	 * @param w
184 	 * @return true if u < w, or if v < w < u and v doesn't link to w; false otherwise
185 	 */
186 	protected static <V,E> boolean shouldCount(Graph<V,E> g, List<V> id, V u, V v, V w) {
187 		int i_u = id.indexOf(u);
188 		int i_w = id.indexOf(w);
189 		if (i_u < i_w)
190 			return true;
191 		int i_v = id.indexOf(v);
192 		if ((i_v < i_w) && (i_w < i_u) && (!g.isNeighbor(w,v)))
193 			return true;
194 		return false;
195 	}
196 }