From bfa64118fa467007c3c6623ca398d31290bb097b Mon Sep 17 00:00:00 2001 From: gravel Date: Wed, 19 Apr 2023 14:35:26 +0000 Subject: [PATCH] Add CommunityTag & parse ASGL tags --- output/styles2.css | 5 +- php/utils/servers-rooms.php | 51 ++-- php/utils/sources.php | 326 ++++++++++++++++++++------ php/utils/tags.php | 124 ++++++++++ php/utils/utils.php | 15 +- sites/+components/tbl_communities.php | 6 +- 6 files changed, 414 insertions(+), 113 deletions(-) create mode 100644 php/utils/tags.php diff --git a/output/styles2.css b/output/styles2.css index 52d3e3d..2ab3d3c 100644 --- a/output/styles2.css +++ b/output/styles2.css @@ -70,10 +70,13 @@ html:not(.js) .js-only { margin-inline: 0.2em; border: #0004 outset 1px; /* box-shadow: 0.05em 0.05em 0.1em 0 #4444;*/ +} + +.room-label-user { background-color: greenyellow; } -.badge-special { +.room-label-reserved { background-color: yellow; } diff --git a/php/utils/servers-rooms.php b/php/utils/servers-rooms.php index 6b45c0a..04b586e 100644 --- a/php/utils/servers-rooms.php +++ b/php/utils/servers-rooms.php @@ -2,6 +2,7 @@ include_once "$PROJECT_ROOT/languages/language_flags.php"; include_once "$PROJECT_ROOT/php/servers/known-servers.php"; + include_once 'tags.php'; $MINUTE_SECONDS = 60; $HOUR_SECONDS = 60 * $MINUTE_SECONDS; @@ -74,7 +75,7 @@ * * Custom attribute. */ - public array $tags; + private array $tags = []; private function __construct(\CommunityServer $server, array $details) { global $languages; @@ -97,11 +98,6 @@ $room_identifier = $this->get_room_identifier(); - /** @var string[] */ - $this->tags = isset($details['tags']) - ? $details['tags'] - : []; - $this->language_flag = isset($languages[$room_identifier]) ? $languages[$room_identifier] @@ -122,7 +118,12 @@ * @param CommunityServer $server */ public static function from_details($server, array $details) { - return new CommunityRoom($server, $details); + $room = new CommunityRoom($server, $details); + $has_tags = isset($details['tags']); + if ($has_tags) { + $room->tags = CommunityTag::from_details_array($details['tags']); + } + return $room; } /** @@ -203,7 +204,6 @@ } return floor($active_users_cutoff) . 's'; - } /** @@ -260,41 +260,33 @@ } /** - * @var string[] RESERVED_TAGS - * Array of derived tags unavailable for manual tagging. - */ - const RESERVED_TAGS = ["official"]; - - /** - * Checks whether the given manual tag can be accepted. + * Checks whether this room belongs to a Session-owned server. */ - static function is_reserved_tag(string $tag): bool { - return in_array(strtolower($tag), CommunityRoom::RESERVED_TAGS); + function is_official_room(): bool { + return $this->server->is_official_server(); } /** - * Checks whether this room belongs to a Session-owned server. + * @param string[] $tags */ - function is_official_room(): bool { - return $this->server->is_official_server(); + function add_tags(array $tags) { + $this->tags = [...$this->tags, ...$tags]; } /** * Return the tags associated with this room. - * @return string[] Tags as string array. + * @return \CommunityTag[] Tags as string array. */ function get_room_tags(): array { - $user_tags = array_filter( - $this->tags, - function($tag) { - return !CommunityRoom::is_reserved_tag($tag); - } - ); + $user_tags = CommunityTag::from_user_tags($this->tags); + /** + * @var \CommunityTag[] $derived_tags + */ $derived_tags = []; if ($this->is_official_room()) { - $derived_tags[] = "official"; + $derived_tags[] = new CommunityTag("official", TagType::RESERVED_TAG); } return [...$derived_tags, ...$user_tags]; @@ -586,7 +578,7 @@ foreach ($servers as $server) { foreach ($server->rooms as $room) { $sourced_tags = $source->get_room_tags($room->get_room_identifier()); - $room->tags = [...$room->tags, ...$sourced_tags]; + $room->add_tags($sourced_tags); } } } @@ -793,7 +785,6 @@ // Check reachability before polling too much. if (count($this->room_hints) >= 2) { log_info("Checking reachability for $base_url first..."); - log_value($this->room_hints); if (!url_is_reachable($base_url, retries: $FAST_FETCH_MODE ? 1 : 4)) { log_warning("Reachability test failed by $base_url."); return false; diff --git a/php/utils/sources.php b/php/utils/sources.php index 4c074b0..f7ed8bb 100644 --- a/php/utils/sources.php +++ b/php/utils/sources.php @@ -1,8 +1,194 @@ contents = $contents; + } + + /** + * Create new instance of this source from contents. + * Returns false if processing the source fails. + * @return \SDIRCommunitySource|false + */ + public static function from_contents(string $contents) { + $source = new SDIRCommunitySource($contents); + + if (!$source->sdir_process_tags()) { + return false; + } + + return $source; + } + + private readonly string $contents; + + /** + * @var string[][] $tags Array associating room IDs with string tag arrays. + */ + private array $tags; + + private static function sdir_validate_entry( + array $room_entry, + bool &$missing_url, + bool &$missing_tags + ): bool { + if (!isset($room_entry['url']) || !is_string($room_entry['url'])) { + log_value($room_entry); + $missing_url = true; + return false; + } + + if (!isset($room_entry['tags']) || !is_string($room_entry['tags'])) { + log_value($room_entry); + $missing_tags = true; + return false; + } + + return true; + } + + private static function sdir_report_errors(bool $entry_missing_url, bool $entry_missing_tags) { + if ($entry_missing_url) { + log_error("One or more room entries from session.directory is missing the 'url' parameter."); + } + + if ($entry_missing_tags) { + log_error("One or more room entries from session.directory is missing the 'tags' parameter."); + } + } + + private function get_sdir_entries(): array|bool { + try { + return json_decode($this->contents, true, 512, JSON_THROW_ON_ERROR); + } catch (JsonException) { + return false; + } + } + + private function sdir_process_tags(): bool { + $entry_missing_url = false; + $entry_missing_tags = false; + + $rooms = SDIRCommunitySource::get_sdir_entries($this->contents); + + if (!$rooms) { + log_error("Could not parse entries from session.directory."); + return false; + } + + foreach ($rooms as $room_entry) { + if (!SDIRCommunitySource::sdir_validate_entry( + $room_entry, $entry_missing_url, $entry_missing_tags + )) { + continue; + } + + $url = $room_entry['url']; + $tags = $room_entry['tags']; + + $room_id = url_get_room_id($url); + + $this->tags[$room_id] = explode(',', $tags); + } + + SDIRCommunitySource::sdir_report_errors($entry_missing_url, $entry_missing_tags); + + return true; + } + + /** + * @return string[][] Array associating room IDs with string tag arrays. + */ + public function get_tags(): array { + return $this->tags; + } + } + + class ASGLCommunitySource { + private function __construct(string $contents) { + $this->contents = $contents; + } + + private readonly string $contents; + + /** + * @var string[][] $tags; + */ + private array $tags = []; + + /** + * @return \ASGLCommunitySource|false + */ + public static function from_contents(string $contents) { + $source = new ASGLCommunitySource($contents); + + if(!$source->asgl_process_tags()) { + return false; + } + + return $source; + } + + private function asgl_process_tags(): bool { + $lines = explode("\n", $this->contents); + // $last_headings = []; + $last_room_id = null; + $room_tags = []; + + foreach ($lines as $line) { + ASGLCommunitySource::asgl_process_line($line, $last_room_id, $room_tags); + + if ($last_room_id != null && count($room_tags) > 0) { + $this->tags[$last_room_id] = $room_tags; + $last_room_id = null; + $room_tags = []; + } + } + + return true; + } + private static function asgl_process_line( + ?string $line, + ?string &$last_room_id, + array &$room_tags + ) { + $line = trim($line); + + if (strlen($line) == 0) { + $last_room_id = null; + return; + } + + $urls = parse_join_links($line); + + if (count($urls) == 1 && $urls[0] == $line) { + $last_room_id = url_get_room_id($urls[0]); + $room_tags = []; + return; + } + + if (str_starts_with($line, "hashtag")) { + $room_tags = ASGLCommunitySource::read_asgl_tags($line); + } + } + + private static function read_asgl_tags(string $line): array { + $matches = []; + preg_match_all('/`#([^`]+)`/', $line, $matches); + // Return first group matches. + return $matches[1]; + } + + public function get_tags(): array { + + return $this->tags; + } + } + + class CommunitySources { private const SOURCES = array( 'ASGL' => 'https://raw.githubusercontent.com/GNU-Linux-libre/Awesome-Session-Group-List/main/README.md', 'LOKI' => 'https://lokilocker.com/Mods/Session-Groups/wiki/Session-Open-Groups', @@ -13,109 +199,93 @@ 'FARK' => 'https://freearkham.cc/' ); - private string $contents_aggregated = ""; - - private string $contents_sdir = ""; + private readonly string $contents_asgl; + private readonly string $contents_loki; + private readonly string $contents_sdir; + private readonly string $contents_fark; + private readonly string $contents_aggregated; - private array $room_tags; + /** + * Arraying associating room identifiers with arrays of raw tags. + * @var array $room_tags + */ + private array $room_tags = []; /** * Fetches and saves known sources of Session Community join links. */ public function __construct() { - global $SOURCES; - log_info("Requesting Awesome Session Group list..."); - $contents_asgl = file_get_contents(CommunitySources::SOURCES['ASGL']); - log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK" + $this->contents_asgl = CommunitySources::fetch_source('ASGL'); log_info("Requesting Lokilocker Mods Open Group list..."); - $contents_loki = file_get_contents(CommunitySources::SOURCES['LOKI']); - log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK" + $this->contents_loki = CommunitySources::fetch_source('LOKI'); log_info("Requesting session.directory list..."); - $this->contents_sdir = file_get_contents(CommunitySources::SOURCES['SDIR-JSON']); - log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK" - - if (!$this->sdir_process_tags()) { - log_debug(truncate($this->contents_sdir, 50)); - log_error("Could not parse tags from session.directory."); - } + $this->contents_sdir = CommunitySources::fetch_source('SDIR-JSON'); log_info("Requesting FreeArkham.cc list..."); - $contents_fark = file_get_contents(CommunitySources::SOURCES['FARK']); - log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK" + $this->contents_fark = CommunitySources::fetch_source('FARK'); - log_info('Done fetching sources.'); + log_info("Parsing extra information..."); + if (!$this->process_sources()) { + log_error("Could not parse extra information from one or more sources."); + } + log_info('Done fetching sources.'); - $this->contents_aggregated = $contents_asgl . $contents_fark . $contents_loki . $this->contents_sdir; + $this->contents_aggregated = + $this->contents_asgl . + $this->contents_fark . + $this->contents_loki . + $this->contents_sdir; } - private static function preprocess_tag(?string $tag) { - $tag = trim($tag); - - if (strlen($tag) == 0) { - return $tag; - } + private static function fetch_source(string $source_key) { + $url = CommunitySources::SOURCES[$source_key]; - $tag = html_sanitize(html_entity_decode($tag)); + $contents = file_get_contents($url); + log_debug($http_response_header[0]); - if ($tag[0] == '#') { - return substr($tag, 1); + if (!$contents) { + log_error("Could not fetch source from $url."); + return ""; } - return $tag; + return $contents; } - private function sdir_process_tags(): bool { - $entry_missing_url = false; - $entry_missing_tags = false; - - try { - $rooms = json_decode($this->contents_sdir, true, 512, JSON_THROW_ON_ERROR); - } catch (JsonException) { - return false; - } - - foreach ($rooms as $room_entry) { - // TODO: Check types - if (!isset($room_entry['url'])) { - log_value($room_entry); - $entry_missing_url = true; - continue; - } - if (!isset($room_entry['tags'])) { - log_value($room_entry); - $entry_missing_tags = true; - continue; + /** + * @param string[][] $tags Array associating room IDs to tag arrays + */ + private function add_tags(array $tags) { + foreach ($tags as $room_id => $room_tags) { + if (!isset($this->room_tags[$room_id])) { + $this->room_tags[$room_id] = []; } - $url = $room_entry['url']; - $tags = explode(',', $room_entry['tags']); - $room_token = url_get_token($url); - $pubkey = url_get_pubkey($url); - $pubkey_4 = substr($pubkey, 0, 4); - $room_id = "$room_token+$pubkey_4"; - $tags = array_map(function(?string $tag) { - return CommunitySources::preprocess_tag($tag); - }, $tags); + $this->room_tags[$room_id] = [ + ...$this->room_tags[$room_id], + ...$room_tags + ]; + } + } - $tags = array_filter( - $tags, function(?string $tag) { - return strlen($tag) != 0; - } - ); + private function process_sources(): bool { + $source_sdir = SDIRCommunitySource::from_contents($this->contents_sdir); + + $source_asgl = ASGLCommunitySource::from_contents($this->contents_asgl); - $this->room_tags[$room_id] = $tags; - } + $source_sdir && $this->add_tags($source_sdir->get_tags()); - if ($entry_missing_url) { - log_error("One or more room entries from session.directory is missing the 'url' parameter."); + $source_asgl && $this->add_tags($source_asgl->get_tags()); + + if (!$source_sdir) { + return false; } - - if ($entry_missing_tags) { - log_error("One or more room entries from session.directory is missing the 'tags' parameter."); + + if (!$source_asgl) { + return false; } return true; @@ -125,7 +295,7 @@ * Returns all join URLs found. * @return string[] Join URLs. */ - function get_join_urls(): array { + public function get_join_urls(): array { return array_unique( parse_join_links($this->contents_aggregated) ); @@ -134,14 +304,14 @@ /** * Return known tags for the given room. * @param string $room_id Room identifier. - * @return string[] Array of string tags. + * @return \CommunityTag[] Array of string tags. */ - function get_room_tags($room_id): array { + public function get_room_tags($room_id): array { if (!isset($this->room_tags[$room_id])) { return []; } - return array_slice($this->room_tags[$room_id], 0); + return $this->room_tags[$room_id]; } } ?> \ No newline at end of file diff --git a/php/utils/tags.php b/php/utils/tags.php new file mode 100644 index 0000000..8ac34de --- /dev/null +++ b/php/utils/tags.php @@ -0,0 +1,124 @@ +text = $text; + $this->type = $tag_type; + } + + public readonly int $type; + + public readonly string $text; + + /** + * Returns a lowercase representation of the tag for purposes of de-duping. + */ + public function __toString(): string { + return strtolower($this->text); + } + + public function jsonSerialize(): mixed { + if ($this->type != TagType::USER_TAG) { + throw new LogicException("Should not serialize derived tags."); + } + return $this->text; + } + + private static function preprocess_tag(?string $tag) { + $tag = trim($tag); + + if (strlen($tag) == 0) { + return $tag; + } + + $tag = html_sanitize(html_entity_decode($tag)); + + if ($tag[0] == '#') { + return substr($tag, 1); + } + + return $tag; + } + + /** + * @param string[] $tag_array + * @return \CommunityTag[] + */ + private static function from_tag_array(array $tag_array) { + $tags = array_map(function(?string $tag) { + return CommunityTag::preprocess_tag($tag); + }, $tag_array); + + $tags = array_filter( + $tags, function(?string $tag) { + return strlen($tag) != 0; + } + ); + + return array_map(function(string $tag) { + return new CommunityTag($tag); + }, $tags); + } + + /** + * Returns the user tags given, without any reserved tags. + * @param string[] $tags + * @return \CommunityTag[] + */ + public static function from_user_tags(array $tags): array { + $tags_user = array_filter( + $tags, + function($tag) { + return !CommunityTag::is_reserved_tag($tag); + } + ); + + return CommunityTag::from_tag_array($tags_user); + } + + /** + * @param string[] $details_array Array of string tags. + * @return \CommunityTag[] + */ + public static function from_details_array(array $details_array): array { + return CommunityTag::from_user_tags($details_array); + } + + /** + * @param \CommunityTag[] $tags + * @return \CommunityTag[] + */ + public static function dedupe_tags(array $tags) { + return array_unique($tags); + } + + public function get_tag_type(): string { + return match($this->type) { + TagType::USER_TAG => 'user', + TagType::RESERVED_TAG => 'reserved', + TagType::WARNING_TAG => 'warning' + }; + } + /** + * @var string[] RESERVED_TAGS + * Array of derived tags unavailable for manual tagging. + */ + private const RESERVED_TAGS = ["official"]; + + /** + * Checks whether the given manual tag can be accepted. + */ + public static function is_reserved_tag(string $tag): bool { + return in_array(strtolower($tag), CommunityTag::RESERVED_TAGS); + } + + } +?> \ No newline at end of file diff --git a/php/utils/utils.php b/php/utils/utils.php index f2004f8..6c44df5 100644 --- a/php/utils/utils.php +++ b/php/utils/utils.php @@ -122,9 +122,10 @@ * to an unreachable host. */ function curl_get_response(string $url, int $retries, $stop_on_codes = [404], $curlopts = []) { + global $FAST_FETCH_MODE; // use separate timeouts to reliably get data from Chinese server with repeated tries $connecttimeout = 2; // wait at most X seconds to connect - $timeout = 3; // can't take longer than X seconds for the whole curl process + $timeout = $FAST_FETCH_MODE ? 1.5 : 3; // can't take longer than X seconds for the whole curl process $sleep = 2; // sleep between tries in seconds // takes at most ($timeout + $sleep) * $retries seconds @@ -200,6 +201,18 @@ return $query_components['public_key']; } + /** + * Computes a room's ID from a join URL. + * @param string $join_url Join URL for Session Community. + * @return string Room identifier per our format. + */ + function url_get_room_id(string $join_url) { + $room_token = url_get_token($join_url); + $pubkey = url_get_pubkey($join_url); + $pubkey_4 = substr($pubkey, 0, 4); + return "$room_token+$pubkey_4"; + } + /** * Extracts join links that match $REGEX_JOIN_LINK. * @param ?string $html Text to find join URLs in. diff --git a/sites/+components/tbl_communities.php b/sites/+components/tbl_communities.php index 8fab844..8c3b6e3 100644 --- a/sites/+components/tbl_communities.php +++ b/sites/+components/tbl_communities.php @@ -75,23 +75,23 @@ get_room_tags() as $tag): ?> text, 16) ?>