"c7fbfa183b601f4d393a43644dae11e5f644db2a18c747865db1ca922e632e32", "open.getsession.org" => "a03c383cf63c3c4efe67acc52112a6dd734b3a946b9545f488aaa93da7991238" ); // run main function main(); function main() { echo("Running, please wait..." . PHP_EOL); echo("This script will take approximately 2 minutes to run." . PHP_EOL); $html = get_html_from_known_sources(); $wild_join_links = extract_join_links_from_html($html); $servers = get_servers_from_join_links($wild_join_links); $servers = reduce_servers($servers); $servers = merge_servers_with_known_good_servers($servers); $rooms = query_servers_for_rooms($servers); $pubkeys = acquire_pubkeys_from_join_links($wild_join_links); $pubkeys = merge_pubkeys_with_known_pubkeys($pubkeys); $addr_assignments = get_pubkeys_of_servers($servers, $pubkeys); $addr_assignments = reduce_addresses_of_pubkeys($addr_assignments); $room_assignments = assign_rooms_to_address_assignments($addr_assignments, $rooms); $final_join_links = generate_join_links($room_assignments); // print_r($servers); // print_r($rooms); // print_r($addr_assignments); // print_r($room_assignments); //TODO: We also assigned empty room arrays. Should probably be fixed // print_r($final_join_links); echo(count($final_join_links) . " unique Session Communities have been found." . PHP_EOL); //TODO: What about room view links? $table_html = get_table_html($room_assignments); $final_html = create_html_page_from_table($table_html, "Session Communities"); //echo($final_html); } /* * Queries following known sources of join links for Session Communities: * - Awesome Session Open Group List repository on GithUb * - LokiLocker.com Open Groups * - https://session.directory open groups */ function get_html_from_known_sources() { // known open group / community lists $asgl = "https://github.com/GNU-Linux-libre/Awesome-Session-Group-List/raw/main/README.md"; $ll = "https://lokilocker.com/Mods/Session-Groups/wiki/Session-Open-Groups"; $sd_pre = "https://session.directory/?all=groups" ; // this one has to be expanded first // get awesome session group list html $asgl_html = file_get_contents($asgl); // get lokilocker.com html $ll_html = file_get_contents($ll); // get session.directory html $sd_html = ""; $sd_pre_html = file_get_contents($sd_pre); $sd_pattern = "/view_session_group_user_lokinet\.php\?id=\d+/"; preg_match_all($sd_pattern, $sd_pre_html, $sd_links); $sd_links = $sd_links[0]; // don't know why foreach ($sd_links as &$link) { // add prefix "https://session.directory to the sd_links $link = str_replace('view_session_group_user_lokinet.php?id=', 'https://session.directory/view_session_group_user_lokinet.php?id=', $link); // add html to sd_html $sd_html = $sd_html . file_get_contents($link); } // merge all html into a single string return( $asgl_html . PHP_EOL . $ll_html . PHP_EOL . $sd_html ); } /* * Extracts all links that match the $room_join_regex * Example: http(s)://whatever:port/?public_key=0123456789abcef * Result is sorted and unique * There's no check for reachability or additional https availability */ function extract_join_links_from_html($html){ global $room_join_regex; $result = array(); preg_match_all($room_join_regex, $html, $result); $result = $result[0]; // there's only $result[0], no $result[1] or others // filter $result[0] because some entries look like this: //[106] => http://sog.caliban.org/im?public_key=118df8c6c471ac0468c7c77e1cdc12f24a139ee8a07c6e3bf4e7855640dad821" rel="nofollow">http://sog.caliban.org/im?public_key=118df8c6c471ac0468c7c77e1cdc12f24a139ee8a07c6e3bf4e7855640dad821 //TODO: Figure out why the regex does match those foreach($result as &$entry) { if(str_contains($entry, "\"")) { $entry = explode("\"", $entry)[0]; // split on " and take first part } } $result = array_unique($result); sort($result); return $result; } /* * Gets all servers from an array of join links * Returns an array that looks like this: * [0] => 1.2.3.4 * [1] => 2.3.4.5:12345 * [2] => example.com * [3] => dev.test:23456 * Result is sorted and unique */ function get_servers_from_join_links($join_links_arr) { $result = array(); foreach($join_links_arr as $join_link){ $split = array(); $split = explode("/", $join_link); // http(s): + "" + 1.2.3.4:56789 + "name?public_key=0123456789abcdef" $result[] = $split[2]; // 1.2.3.4:56789 } $result = array_unique($result); sort($result); return $result; } /* * Checks whether servers are reachable and whether they support https * and makes sure that there are no http/https duplicates * Input is an array of servers without protocol (no http:(s)// in front) * Result is unique and sorted */ function reduce_servers($servers_arr) { $reduced_servers = array(); $offline_servers = array(); // debug foreach($servers_arr as $server) { // try https $url = "https://" . $server; if(url_is_reachable($url)){ $reduced_servers[] = $url; } else{ // try http $url = "http://" . $server; if(url_is_reachable($url)){ $reduced_servers[] = $url; } else { $offline_servers[] = $url; // echo("Server " . $server . " is not reachable" . PHP_EOL); } } } $reduced_servers = array_unique($reduced_servers); sort($reduced_servers); return $reduced_servers; } /* * Some servers don't appear in the wild yet, but can be queried * Ideally this shouldn't be necessary, but it is for now * Should be called after reduce_servers() */ function merge_servers_with_known_good_servers($url_arr){ $result = array(); global $known_servers; $result = array_merge($url_arr, $known_servers); $result = array_unique($result); // just in case we accidentally add a duplicate sort($result); return $result; } /* * Helper function for reduce_servers */ function url_is_reachable($url) { $ch = curl_init($url); curl_setopt($ch, CURLOPT_NOBODY, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_TIMEOUT_MS, 1500); // 1500ms or 1.5s curl_exec($ch); $retcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($retcode == 200) { return true; } else { return false; } } /* * Takes an input like this: * [0] => http://1.2.3.4 * [1] => https://2.3.4.5:12345 * [2] => https://example.com * [3] => http://dev.test:23456 * and queries the /room JSON API endpoint * Returns a multidimensional array * The first dimension uses the server URL as public_key * The second dimension is an array that contains $room_array array * $room_array arrays contain token, name, users and description */ function query_servers_for_rooms($url_arr){ $rooms = array(); $endpoint = "/rooms"; $failed = array(); // debug // we can't use array_unique later so we make sure the input is unique $url_arr = array_unique($url_arr); // not really necessary though // we can't use sort or asort later so me do it now sort($url_arr); // not really necessary though foreach($url_arr as $url) { $json_url = $url . $endpoint; $json = file_get_contents($json_url); if($json) { $json_obj = json_decode($json); $server_rooms = array(); // if response was not empty if($json_obj) { foreach($json_obj as $json_room) { $token = $json_room->token; // room "name" $room_array = array( "token" => $token, "name" => $json_room->name, "active_users" => $json_room->active_users, "description" => $json_room->description ); //$server_rooms[] = $token; $server_rooms[$token] = $room_array; } //sort($server_rooms); $rooms[$url] = $server_rooms; } } else { // 404 - could mean it's a legacy server that doesn't provide /room endpoint $failed[] = $url; $legacy_rooms = query_homepage_for_rooms($url); if($legacy_rooms) { $rooms[$url] = $legacy_rooms; } } } // print_r($failed); return $rooms; } /* * For servers that do not provide the /rooms endpoint * Takes same input as query_api_for_rooms(), but only singular URL * Returns array of all available rooms (each its own array with token, name, users and description) * Result is false if no rooms where found */ function query_homepage_for_rooms($url) { $result = array(); global $room_token_regex_part; $contents = file_get_contents($url); if($contents) { $regex_new = "/\/r\/" . $room_token_regex_part . "/"; $regex_old = "/\/view\/room\/" . $room_token_regex_part . "/"; preg_match_all($regex_new, $contents, $rooms); $rooms = $rooms[0]; // if the new regex doesn't match, use the old one if(empty($rooms)) { preg_match_all($regex_old, $contents, $rooms); $rooms = $rooms[0]; } // if one of the two regex has found anything if(!empty($rooms)) { // we also want the room names (not tokens) preg_match_all('/
Identifier | " . PHP_EOL . "Name | " . PHP_EOL . "Description | " . PHP_EOL . "Users | " . PHP_EOL . "Join URL | " . PHP_EOL . "
---|