Add CommunityTag & parse ASGL tags

dev
gravel 2 years ago
parent a8f0464149
commit bfa64118fa
Signed by: gravel
GPG Key ID: C0538F3C906B308F

@ -70,10 +70,13 @@ html:not(.js) .js-only {
margin-inline: 0.2em; margin-inline: 0.2em;
border: #0004 outset 1px; border: #0004 outset 1px;
/* box-shadow: 0.05em 0.05em 0.1em 0 #4444;*/ /* box-shadow: 0.05em 0.05em 0.1em 0 #4444;*/
}
.room-label-user {
background-color: greenyellow; background-color: greenyellow;
} }
.badge-special { .room-label-reserved {
background-color: yellow; background-color: yellow;
} }

@ -2,6 +2,7 @@
include_once "$PROJECT_ROOT/languages/language_flags.php"; include_once "$PROJECT_ROOT/languages/language_flags.php";
include_once "$PROJECT_ROOT/php/servers/known-servers.php"; include_once "$PROJECT_ROOT/php/servers/known-servers.php";
include_once 'tags.php';
$MINUTE_SECONDS = 60; $MINUTE_SECONDS = 60;
$HOUR_SECONDS = 60 * $MINUTE_SECONDS; $HOUR_SECONDS = 60 * $MINUTE_SECONDS;
@ -74,7 +75,7 @@
* *
* Custom attribute. * Custom attribute.
*/ */
public array $tags; private array $tags = [];
private function __construct(\CommunityServer $server, array $details) { private function __construct(\CommunityServer $server, array $details) {
global $languages; global $languages;
@ -97,11 +98,6 @@
$room_identifier = $this->get_room_identifier(); $room_identifier = $this->get_room_identifier();
/** @var string[] */
$this->tags = isset($details['tags'])
? $details['tags']
: [];
$this->language_flag = $this->language_flag =
isset($languages[$room_identifier]) isset($languages[$room_identifier])
? $languages[$room_identifier] ? $languages[$room_identifier]
@ -122,7 +118,12 @@
* @param CommunityServer $server * @param CommunityServer $server
*/ */
public static function from_details($server, array $details) { public static function from_details($server, array $details) {
return new CommunityRoom($server, $details); $room = new CommunityRoom($server, $details);
$has_tags = isset($details['tags']);
if ($has_tags) {
$room->tags = CommunityTag::from_details_array($details['tags']);
}
return $room;
} }
/** /**
@ -203,7 +204,6 @@
} }
return floor($active_users_cutoff) . 's'; return floor($active_users_cutoff) . 's';
} }
/** /**
@ -260,41 +260,33 @@
} }
/** /**
* @var string[] RESERVED_TAGS * Checks whether this room belongs to a Session-owned server.
* Array of derived tags unavailable for manual tagging.
*/
const RESERVED_TAGS = ["official"];
/**
* Checks whether the given manual tag can be accepted.
*/ */
static function is_reserved_tag(string $tag): bool { function is_official_room(): bool {
return in_array(strtolower($tag), CommunityRoom::RESERVED_TAGS); return $this->server->is_official_server();
} }
/** /**
* Checks whether this room belongs to a Session-owned server. * @param string[] $tags
*/ */
function is_official_room(): bool { function add_tags(array $tags) {
return $this->server->is_official_server(); $this->tags = [...$this->tags, ...$tags];
} }
/** /**
* Return the tags associated with this room. * Return the tags associated with this room.
* @return string[] Tags as string array. * @return \CommunityTag[] Tags as string array.
*/ */
function get_room_tags(): array { function get_room_tags(): array {
$user_tags = array_filter( $user_tags = CommunityTag::from_user_tags($this->tags);
$this->tags,
function($tag) {
return !CommunityRoom::is_reserved_tag($tag);
}
);
/**
* @var \CommunityTag[] $derived_tags
*/
$derived_tags = []; $derived_tags = [];
if ($this->is_official_room()) { if ($this->is_official_room()) {
$derived_tags[] = "official"; $derived_tags[] = new CommunityTag("official", TagType::RESERVED_TAG);
} }
return [...$derived_tags, ...$user_tags]; return [...$derived_tags, ...$user_tags];
@ -586,7 +578,7 @@
foreach ($servers as $server) { foreach ($servers as $server) {
foreach ($server->rooms as $room) { foreach ($server->rooms as $room) {
$sourced_tags = $source->get_room_tags($room->get_room_identifier()); $sourced_tags = $source->get_room_tags($room->get_room_identifier());
$room->tags = [...$room->tags, ...$sourced_tags]; $room->add_tags($sourced_tags);
} }
} }
} }
@ -793,7 +785,6 @@
// Check reachability before polling too much. // Check reachability before polling too much.
if (count($this->room_hints) >= 2) { if (count($this->room_hints) >= 2) {
log_info("Checking reachability for $base_url first..."); log_info("Checking reachability for $base_url first...");
log_value($this->room_hints);
if (!url_is_reachable($base_url, retries: $FAST_FETCH_MODE ? 1 : 4)) { if (!url_is_reachable($base_url, retries: $FAST_FETCH_MODE ? 1 : 4)) {
log_warning("Reachability test failed by $base_url."); log_warning("Reachability test failed by $base_url.");
return false; return false;

@ -1,8 +1,194 @@
<?php <?php
require_once 'utils.php'; require_once 'utils.php';
require_once 'tags.php';
class CommunitySources { class SDIRCommunitySource {
private function __construct(string $contents) {
$this->contents = $contents;
}
/**
* Create new instance of this source from contents.
* Returns false if processing the source fails.
* @return \SDIRCommunitySource|false
*/
public static function from_contents(string $contents) {
$source = new SDIRCommunitySource($contents);
if (!$source->sdir_process_tags()) {
return false;
}
return $source;
}
private readonly string $contents;
/**
* @var string[][] $tags Array associating room IDs with string tag arrays.
*/
private array $tags;
private static function sdir_validate_entry(
array $room_entry,
bool &$missing_url,
bool &$missing_tags
): bool {
if (!isset($room_entry['url']) || !is_string($room_entry['url'])) {
log_value($room_entry);
$missing_url = true;
return false;
}
if (!isset($room_entry['tags']) || !is_string($room_entry['tags'])) {
log_value($room_entry);
$missing_tags = true;
return false;
}
return true;
}
private static function sdir_report_errors(bool $entry_missing_url, bool $entry_missing_tags) {
if ($entry_missing_url) {
log_error("One or more room entries from session.directory is missing the 'url' parameter.");
}
if ($entry_missing_tags) {
log_error("One or more room entries from session.directory is missing the 'tags' parameter.");
}
}
private function get_sdir_entries(): array|bool {
try {
return json_decode($this->contents, true, 512, JSON_THROW_ON_ERROR);
} catch (JsonException) {
return false;
}
}
private function sdir_process_tags(): bool {
$entry_missing_url = false;
$entry_missing_tags = false;
$rooms = SDIRCommunitySource::get_sdir_entries($this->contents);
if (!$rooms) {
log_error("Could not parse entries from session.directory.");
return false;
}
foreach ($rooms as $room_entry) {
if (!SDIRCommunitySource::sdir_validate_entry(
$room_entry, $entry_missing_url, $entry_missing_tags
)) {
continue;
}
$url = $room_entry['url'];
$tags = $room_entry['tags'];
$room_id = url_get_room_id($url);
$this->tags[$room_id] = explode(',', $tags);
}
SDIRCommunitySource::sdir_report_errors($entry_missing_url, $entry_missing_tags);
return true;
}
/**
* @return string[][] Array associating room IDs with string tag arrays.
*/
public function get_tags(): array {
return $this->tags;
}
}
class ASGLCommunitySource {
private function __construct(string $contents) {
$this->contents = $contents;
}
private readonly string $contents;
/**
* @var string[][] $tags;
*/
private array $tags = [];
/**
* @return \ASGLCommunitySource|false
*/
public static function from_contents(string $contents) {
$source = new ASGLCommunitySource($contents);
if(!$source->asgl_process_tags()) {
return false;
}
return $source;
}
private function asgl_process_tags(): bool {
$lines = explode("\n", $this->contents);
// $last_headings = [];
$last_room_id = null;
$room_tags = [];
foreach ($lines as $line) {
ASGLCommunitySource::asgl_process_line($line, $last_room_id, $room_tags);
if ($last_room_id != null && count($room_tags) > 0) {
$this->tags[$last_room_id] = $room_tags;
$last_room_id = null;
$room_tags = [];
}
}
return true;
}
private static function asgl_process_line(
?string $line,
?string &$last_room_id,
array &$room_tags
) {
$line = trim($line);
if (strlen($line) == 0) {
$last_room_id = null;
return;
}
$urls = parse_join_links($line);
if (count($urls) == 1 && $urls[0] == $line) {
$last_room_id = url_get_room_id($urls[0]);
$room_tags = [];
return;
}
if (str_starts_with($line, "hashtag")) {
$room_tags = ASGLCommunitySource::read_asgl_tags($line);
}
}
private static function read_asgl_tags(string $line): array {
$matches = [];
preg_match_all('/`#([^`]+)`/', $line, $matches);
// Return first group matches.
return $matches[1];
}
public function get_tags(): array {
return $this->tags;
}
}
class CommunitySources {
private const SOURCES = array( private const SOURCES = array(
'ASGL' => 'https://raw.githubusercontent.com/GNU-Linux-libre/Awesome-Session-Group-List/main/README.md', 'ASGL' => 'https://raw.githubusercontent.com/GNU-Linux-libre/Awesome-Session-Group-List/main/README.md',
'LOKI' => 'https://lokilocker.com/Mods/Session-Groups/wiki/Session-Open-Groups', 'LOKI' => 'https://lokilocker.com/Mods/Session-Groups/wiki/Session-Open-Groups',
@ -13,109 +199,93 @@
'FARK' => 'https://freearkham.cc/' 'FARK' => 'https://freearkham.cc/'
); );
private string $contents_aggregated = ""; private readonly string $contents_asgl;
private readonly string $contents_loki;
private string $contents_sdir = ""; private readonly string $contents_sdir;
private readonly string $contents_fark;
private readonly string $contents_aggregated;
private array $room_tags; /**
* Arraying associating room identifiers with arrays of raw tags.
* @var array<string,string[]> $room_tags
*/
private array $room_tags = [];
/** /**
* Fetches and saves known sources of Session Community join links. * Fetches and saves known sources of Session Community join links.
*/ */
public function __construct() { public function __construct() {
global $SOURCES;
log_info("Requesting Awesome Session Group list..."); log_info("Requesting Awesome Session Group list...");
$contents_asgl = file_get_contents(CommunitySources::SOURCES['ASGL']); $this->contents_asgl = CommunitySources::fetch_source('ASGL');
log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
log_info("Requesting Lokilocker Mods Open Group list..."); log_info("Requesting Lokilocker Mods Open Group list...");
$contents_loki = file_get_contents(CommunitySources::SOURCES['LOKI']); $this->contents_loki = CommunitySources::fetch_source('LOKI');
log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
log_info("Requesting session.directory list..."); log_info("Requesting session.directory list...");
$this->contents_sdir = file_get_contents(CommunitySources::SOURCES['SDIR-JSON']); $this->contents_sdir = CommunitySources::fetch_source('SDIR-JSON');
log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
if (!$this->sdir_process_tags()) {
log_debug(truncate($this->contents_sdir, 50));
log_error("Could not parse tags from session.directory.");
}
log_info("Requesting FreeArkham.cc list..."); log_info("Requesting FreeArkham.cc list...");
$contents_fark = file_get_contents(CommunitySources::SOURCES['FARK']); $this->contents_fark = CommunitySources::fetch_source('FARK');
log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
log_info('Done fetching sources.'); log_info("Parsing extra information...");
if (!$this->process_sources()) {
log_error("Could not parse extra information from one or more sources.");
}
log_info('Done fetching sources.');
$this->contents_aggregated = $contents_asgl . $contents_fark . $contents_loki . $this->contents_sdir; $this->contents_aggregated =
$this->contents_asgl .
$this->contents_fark .
$this->contents_loki .
$this->contents_sdir;
} }
private static function preprocess_tag(?string $tag) { private static function fetch_source(string $source_key) {
$tag = trim($tag); $url = CommunitySources::SOURCES[$source_key];
if (strlen($tag) == 0) {
return $tag;
}
$tag = html_sanitize(html_entity_decode($tag)); $contents = file_get_contents($url);
log_debug($http_response_header[0]);
if ($tag[0] == '#') { if (!$contents) {
return substr($tag, 1); log_error("Could not fetch source from $url.");
return "";
} }
return $tag; return $contents;
} }
private function sdir_process_tags(): bool { /**
$entry_missing_url = false; * @param string[][] $tags Array associating room IDs to tag arrays
$entry_missing_tags = false; */
private function add_tags(array $tags) {
try { foreach ($tags as $room_id => $room_tags) {
$rooms = json_decode($this->contents_sdir, true, 512, JSON_THROW_ON_ERROR); if (!isset($this->room_tags[$room_id])) {
} catch (JsonException) { $this->room_tags[$room_id] = [];
return false;
}
foreach ($rooms as $room_entry) {
// TODO: Check types
if (!isset($room_entry['url'])) {
log_value($room_entry);
$entry_missing_url = true;
continue;
}
if (!isset($room_entry['tags'])) {
log_value($room_entry);
$entry_missing_tags = true;
continue;
} }
$url = $room_entry['url'];
$tags = explode(',', $room_entry['tags']);
$room_token = url_get_token($url);
$pubkey = url_get_pubkey($url);
$pubkey_4 = substr($pubkey, 0, 4);
$room_id = "$room_token+$pubkey_4";
$tags = array_map(function(?string $tag) { $this->room_tags[$room_id] = [
return CommunitySources::preprocess_tag($tag); ...$this->room_tags[$room_id],
}, $tags); ...$room_tags
];
}
}
$tags = array_filter( private function process_sources(): bool {
$tags, function(?string $tag) { $source_sdir = SDIRCommunitySource::from_contents($this->contents_sdir);
return strlen($tag) != 0;
} $source_asgl = ASGLCommunitySource::from_contents($this->contents_asgl);
);
$this->room_tags[$room_id] = $tags; $source_sdir && $this->add_tags($source_sdir->get_tags());
}
if ($entry_missing_url) { $source_asgl && $this->add_tags($source_asgl->get_tags());
log_error("One or more room entries from session.directory is missing the 'url' parameter.");
if (!$source_sdir) {
return false;
} }
if ($entry_missing_tags) { if (!$source_asgl) {
log_error("One or more room entries from session.directory is missing the 'tags' parameter."); return false;
} }
return true; return true;
@ -125,7 +295,7 @@
* Returns all join URLs found. * Returns all join URLs found.
* @return string[] Join URLs. * @return string[] Join URLs.
*/ */
function get_join_urls(): array { public function get_join_urls(): array {
return array_unique( return array_unique(
parse_join_links($this->contents_aggregated) parse_join_links($this->contents_aggregated)
); );
@ -134,14 +304,14 @@
/** /**
* Return known tags for the given room. * Return known tags for the given room.
* @param string $room_id Room identifier. * @param string $room_id Room identifier.
* @return string[] Array of string tags. * @return \CommunityTag[] Array of string tags.
*/ */
function get_room_tags($room_id): array { public function get_room_tags($room_id): array {
if (!isset($this->room_tags[$room_id])) { if (!isset($this->room_tags[$room_id])) {
return []; return [];
} }
return array_slice($this->room_tags[$room_id], 0); return $this->room_tags[$room_id];
} }
} }
?> ?>

@ -0,0 +1,124 @@
<?php
require_once 'utils.php';
class TagType {
private function __construct() {}
const USER_TAG = 0;
const RESERVED_TAG = 1;
const WARNING_TAG = 2;
}
class CommunityTag implements JsonSerializable {
public function __construct(string $text, int $tag_type = TagType::USER_TAG) {
$this->text = $text;
$this->type = $tag_type;
}
public readonly int $type;
public readonly string $text;
/**
* Returns a lowercase representation of the tag for purposes of de-duping.
*/
public function __toString(): string {
return strtolower($this->text);
}
public function jsonSerialize(): mixed {
if ($this->type != TagType::USER_TAG) {
throw new LogicException("Should not serialize derived tags.");
}
return $this->text;
}
private static function preprocess_tag(?string $tag) {
$tag = trim($tag);
if (strlen($tag) == 0) {
return $tag;
}
$tag = html_sanitize(html_entity_decode($tag));
if ($tag[0] == '#') {
return substr($tag, 1);
}
return $tag;
}
/**
* @param string[] $tag_array
* @return \CommunityTag[]
*/
private static function from_tag_array(array $tag_array) {
$tags = array_map(function(?string $tag) {
return CommunityTag::preprocess_tag($tag);
}, $tag_array);
$tags = array_filter(
$tags, function(?string $tag) {
return strlen($tag) != 0;
}
);
return array_map(function(string $tag) {
return new CommunityTag($tag);
}, $tags);
}
/**
* Returns the user tags given, without any reserved tags.
* @param string[] $tags
* @return \CommunityTag[]
*/
public static function from_user_tags(array $tags): array {
$tags_user = array_filter(
$tags,
function($tag) {
return !CommunityTag::is_reserved_tag($tag);
}
);
return CommunityTag::from_tag_array($tags_user);
}
/**
* @param string[] $details_array Array of string tags.
* @return \CommunityTag[]
*/
public static function from_details_array(array $details_array): array {
return CommunityTag::from_user_tags($details_array);
}
/**
* @param \CommunityTag[] $tags
* @return \CommunityTag[]
*/
public static function dedupe_tags(array $tags) {
return array_unique($tags);
}
public function get_tag_type(): string {
return match($this->type) {
TagType::USER_TAG => 'user',
TagType::RESERVED_TAG => 'reserved',
TagType::WARNING_TAG => 'warning'
};
}
/**
* @var string[] RESERVED_TAGS
* Array of derived tags unavailable for manual tagging.
*/
private const RESERVED_TAGS = ["official"];
/**
* Checks whether the given manual tag can be accepted.
*/
public static function is_reserved_tag(string $tag): bool {
return in_array(strtolower($tag), CommunityTag::RESERVED_TAGS);
}
}
?>

@ -122,9 +122,10 @@
* to an unreachable host. * to an unreachable host.
*/ */
function curl_get_response(string $url, int $retries, $stop_on_codes = [404], $curlopts = []) { function curl_get_response(string $url, int $retries, $stop_on_codes = [404], $curlopts = []) {
global $FAST_FETCH_MODE;
// use separate timeouts to reliably get data from Chinese server with repeated tries // use separate timeouts to reliably get data from Chinese server with repeated tries
$connecttimeout = 2; // wait at most X seconds to connect $connecttimeout = 2; // wait at most X seconds to connect
$timeout = 3; // can't take longer than X seconds for the whole curl process $timeout = $FAST_FETCH_MODE ? 1.5 : 3; // can't take longer than X seconds for the whole curl process
$sleep = 2; // sleep between tries in seconds $sleep = 2; // sleep between tries in seconds
// takes at most ($timeout + $sleep) * $retries seconds // takes at most ($timeout + $sleep) * $retries seconds
@ -200,6 +201,18 @@
return $query_components['public_key']; return $query_components['public_key'];
} }
/**
* Computes a room's ID from a join URL.
* @param string $join_url Join URL for Session Community.
* @return string Room identifier per our format.
*/
function url_get_room_id(string $join_url) {
$room_token = url_get_token($join_url);
$pubkey = url_get_pubkey($join_url);
$pubkey_4 = substr($pubkey, 0, 4);
return "$room_token+$pubkey_4";
}
/** /**
* Extracts join links that match $REGEX_JOIN_LINK. * Extracts join links that match $REGEX_JOIN_LINK.
* @param ?string $html Text to find join URLs in. * @param ?string $html Text to find join URLs in.

@ -75,23 +75,23 @@
<td class="td_name" <td class="td_name"
title="Click here to preview '<?=$name?>'" title="Click here to preview '<?=$name?>'"
data-sort-by="<?=strtolower($name)?>" data-sort-by="<?=strtolower($name)?>"
itemprop="name"
> >
<a <a
href="<?=$preview_link?>" href="<?=$preview_link?>"
target="_blank" target="_blank"
title="'<?=$name?>' preview" title="'<?=$name?>' preview"
rel="noopener noreferrer" rel="noopener noreferrer"
itemprop="name"
><?= ><?=
$name $name
?></a> ?></a>
<span class="room-labels-container"> <span class="room-labels-container">
<?php foreach ($room->get_room_tags() as $tag): ?> <?php foreach ($room->get_room_tags() as $tag): ?>
<span <span
class="room-label badge<?=CommunityRoom::is_reserved_tag($tag) ? ' badge-special' : ''?>" class="room-label room-label-<?=$tag->get_tag_type()?> badge"
title="<?=$tag?>" title="<?=$tag?>"
><?= ><?=
truncate($tag, 16) truncate($tag->text, 16)
?></span> ?></span>
<?php endforeach; ?> <?php endforeach; ?>
</span> </span>

Loading…
Cancel
Save