Add CommunityTag & parse ASGL tags

dev
gravel 2 years ago
parent a8f0464149
commit bfa64118fa
Signed by: gravel
GPG Key ID: C0538F3C906B308F

@ -70,10 +70,13 @@ html:not(.js) .js-only {
margin-inline: 0.2em;
border: #0004 outset 1px;
/* box-shadow: 0.05em 0.05em 0.1em 0 #4444;*/
}
.room-label-user {
background-color: greenyellow;
}
.badge-special {
.room-label-reserved {
background-color: yellow;
}

@ -2,6 +2,7 @@
include_once "$PROJECT_ROOT/languages/language_flags.php";
include_once "$PROJECT_ROOT/php/servers/known-servers.php";
include_once 'tags.php';
$MINUTE_SECONDS = 60;
$HOUR_SECONDS = 60 * $MINUTE_SECONDS;
@ -74,7 +75,7 @@
*
* Custom attribute.
*/
public array $tags;
private array $tags = [];
private function __construct(\CommunityServer $server, array $details) {
global $languages;
@ -97,11 +98,6 @@
$room_identifier = $this->get_room_identifier();
/** @var string[] */
$this->tags = isset($details['tags'])
? $details['tags']
: [];
$this->language_flag =
isset($languages[$room_identifier])
? $languages[$room_identifier]
@ -122,7 +118,12 @@
* @param CommunityServer $server
*/
public static function from_details($server, array $details) {
return new CommunityRoom($server, $details);
$room = new CommunityRoom($server, $details);
$has_tags = isset($details['tags']);
if ($has_tags) {
$room->tags = CommunityTag::from_details_array($details['tags']);
}
return $room;
}
/**
@ -203,7 +204,6 @@
}
return floor($active_users_cutoff) . 's';
}
/**
@ -260,41 +260,33 @@
}
/**
* @var string[] RESERVED_TAGS
* Array of derived tags unavailable for manual tagging.
*/
const RESERVED_TAGS = ["official"];
/**
* Checks whether the given manual tag can be accepted.
* Checks whether this room belongs to a Session-owned server.
*/
static function is_reserved_tag(string $tag): bool {
return in_array(strtolower($tag), CommunityRoom::RESERVED_TAGS);
function is_official_room(): bool {
return $this->server->is_official_server();
}
/**
* Checks whether this room belongs to a Session-owned server.
* @param string[] $tags
*/
function is_official_room(): bool {
return $this->server->is_official_server();
function add_tags(array $tags) {
$this->tags = [...$this->tags, ...$tags];
}
/**
* Return the tags associated with this room.
* @return string[] Tags as string array.
* @return \CommunityTag[] Tags as string array.
*/
function get_room_tags(): array {
$user_tags = array_filter(
$this->tags,
function($tag) {
return !CommunityRoom::is_reserved_tag($tag);
}
);
$user_tags = CommunityTag::from_user_tags($this->tags);
/**
* @var \CommunityTag[] $derived_tags
*/
$derived_tags = [];
if ($this->is_official_room()) {
$derived_tags[] = "official";
$derived_tags[] = new CommunityTag("official", TagType::RESERVED_TAG);
}
return [...$derived_tags, ...$user_tags];
@ -586,7 +578,7 @@
foreach ($servers as $server) {
foreach ($server->rooms as $room) {
$sourced_tags = $source->get_room_tags($room->get_room_identifier());
$room->tags = [...$room->tags, ...$sourced_tags];
$room->add_tags($sourced_tags);
}
}
}
@ -793,7 +785,6 @@
// Check reachability before polling too much.
if (count($this->room_hints) >= 2) {
log_info("Checking reachability for $base_url first...");
log_value($this->room_hints);
if (!url_is_reachable($base_url, retries: $FAST_FETCH_MODE ? 1 : 4)) {
log_warning("Reachability test failed by $base_url.");
return false;

@ -1,8 +1,194 @@
<?php
require_once 'utils.php';
require_once 'tags.php';
class CommunitySources {
class SDIRCommunitySource {
private function __construct(string $contents) {
$this->contents = $contents;
}
/**
* Create new instance of this source from contents.
* Returns false if processing the source fails.
* @return \SDIRCommunitySource|false
*/
public static function from_contents(string $contents) {
$source = new SDIRCommunitySource($contents);
if (!$source->sdir_process_tags()) {
return false;
}
return $source;
}
private readonly string $contents;
/**
* @var string[][] $tags Array associating room IDs with string tag arrays.
*/
private array $tags;
private static function sdir_validate_entry(
array $room_entry,
bool &$missing_url,
bool &$missing_tags
): bool {
if (!isset($room_entry['url']) || !is_string($room_entry['url'])) {
log_value($room_entry);
$missing_url = true;
return false;
}
if (!isset($room_entry['tags']) || !is_string($room_entry['tags'])) {
log_value($room_entry);
$missing_tags = true;
return false;
}
return true;
}
private static function sdir_report_errors(bool $entry_missing_url, bool $entry_missing_tags) {
if ($entry_missing_url) {
log_error("One or more room entries from session.directory is missing the 'url' parameter.");
}
if ($entry_missing_tags) {
log_error("One or more room entries from session.directory is missing the 'tags' parameter.");
}
}
private function get_sdir_entries(): array|bool {
try {
return json_decode($this->contents, true, 512, JSON_THROW_ON_ERROR);
} catch (JsonException) {
return false;
}
}
private function sdir_process_tags(): bool {
$entry_missing_url = false;
$entry_missing_tags = false;
$rooms = SDIRCommunitySource::get_sdir_entries($this->contents);
if (!$rooms) {
log_error("Could not parse entries from session.directory.");
return false;
}
foreach ($rooms as $room_entry) {
if (!SDIRCommunitySource::sdir_validate_entry(
$room_entry, $entry_missing_url, $entry_missing_tags
)) {
continue;
}
$url = $room_entry['url'];
$tags = $room_entry['tags'];
$room_id = url_get_room_id($url);
$this->tags[$room_id] = explode(',', $tags);
}
SDIRCommunitySource::sdir_report_errors($entry_missing_url, $entry_missing_tags);
return true;
}
/**
* @return string[][] Array associating room IDs with string tag arrays.
*/
public function get_tags(): array {
return $this->tags;
}
}
class ASGLCommunitySource {
private function __construct(string $contents) {
$this->contents = $contents;
}
private readonly string $contents;
/**
* @var string[][] $tags;
*/
private array $tags = [];
/**
* @return \ASGLCommunitySource|false
*/
public static function from_contents(string $contents) {
$source = new ASGLCommunitySource($contents);
if(!$source->asgl_process_tags()) {
return false;
}
return $source;
}
private function asgl_process_tags(): bool {
$lines = explode("\n", $this->contents);
// $last_headings = [];
$last_room_id = null;
$room_tags = [];
foreach ($lines as $line) {
ASGLCommunitySource::asgl_process_line($line, $last_room_id, $room_tags);
if ($last_room_id != null && count($room_tags) > 0) {
$this->tags[$last_room_id] = $room_tags;
$last_room_id = null;
$room_tags = [];
}
}
return true;
}
private static function asgl_process_line(
?string $line,
?string &$last_room_id,
array &$room_tags
) {
$line = trim($line);
if (strlen($line) == 0) {
$last_room_id = null;
return;
}
$urls = parse_join_links($line);
if (count($urls) == 1 && $urls[0] == $line) {
$last_room_id = url_get_room_id($urls[0]);
$room_tags = [];
return;
}
if (str_starts_with($line, "hashtag")) {
$room_tags = ASGLCommunitySource::read_asgl_tags($line);
}
}
private static function read_asgl_tags(string $line): array {
$matches = [];
preg_match_all('/`#([^`]+)`/', $line, $matches);
// Return first group matches.
return $matches[1];
}
public function get_tags(): array {
return $this->tags;
}
}
class CommunitySources {
private const SOURCES = array(
'ASGL' => 'https://raw.githubusercontent.com/GNU-Linux-libre/Awesome-Session-Group-List/main/README.md',
'LOKI' => 'https://lokilocker.com/Mods/Session-Groups/wiki/Session-Open-Groups',
@ -13,109 +199,93 @@
'FARK' => 'https://freearkham.cc/'
);
private string $contents_aggregated = "";
private readonly string $contents_asgl;
private readonly string $contents_loki;
private readonly string $contents_sdir;
private readonly string $contents_fark;
private readonly string $contents_aggregated;
private string $contents_sdir = "";
private array $room_tags;
/**
* Arraying associating room identifiers with arrays of raw tags.
* @var array<string,string[]> $room_tags
*/
private array $room_tags = [];
/**
* Fetches and saves known sources of Session Community join links.
*/
public function __construct() {
global $SOURCES;
log_info("Requesting Awesome Session Group list...");
$contents_asgl = file_get_contents(CommunitySources::SOURCES['ASGL']);
log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
$this->contents_asgl = CommunitySources::fetch_source('ASGL');
log_info("Requesting Lokilocker Mods Open Group list...");
$contents_loki = file_get_contents(CommunitySources::SOURCES['LOKI']);
log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
$this->contents_loki = CommunitySources::fetch_source('LOKI');
log_info("Requesting session.directory list...");
$this->contents_sdir = file_get_contents(CommunitySources::SOURCES['SDIR-JSON']);
log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
if (!$this->sdir_process_tags()) {
log_debug(truncate($this->contents_sdir, 50));
log_error("Could not parse tags from session.directory.");
}
$this->contents_sdir = CommunitySources::fetch_source('SDIR-JSON');
log_info("Requesting FreeArkham.cc list...");
$contents_fark = file_get_contents(CommunitySources::SOURCES['FARK']);
log_debug($http_response_header[0]); // Supposed to be "HTTP/1.1 200 OK"
$this->contents_fark = CommunitySources::fetch_source('FARK');
log_info('Done fetching sources.');
log_info("Parsing extra information...");
if (!$this->process_sources()) {
log_error("Could not parse extra information from one or more sources.");
}
log_info('Done fetching sources.');
$this->contents_aggregated = $contents_asgl . $contents_fark . $contents_loki . $this->contents_sdir;
$this->contents_aggregated =
$this->contents_asgl .
$this->contents_fark .
$this->contents_loki .
$this->contents_sdir;
}
private static function preprocess_tag(?string $tag) {
$tag = trim($tag);
private static function fetch_source(string $source_key) {
$url = CommunitySources::SOURCES[$source_key];
if (strlen($tag) == 0) {
return $tag;
}
$tag = html_sanitize(html_entity_decode($tag));
$contents = file_get_contents($url);
log_debug($http_response_header[0]);
if ($tag[0] == '#') {
return substr($tag, 1);
if (!$contents) {
log_error("Could not fetch source from $url.");
return "";
}
return $tag;
return $contents;
}
private function sdir_process_tags(): bool {
$entry_missing_url = false;
$entry_missing_tags = false;
/**
* @param string[][] $tags Array associating room IDs to tag arrays
*/
private function add_tags(array $tags) {
foreach ($tags as $room_id => $room_tags) {
if (!isset($this->room_tags[$room_id])) {
$this->room_tags[$room_id] = [];
}
try {
$rooms = json_decode($this->contents_sdir, true, 512, JSON_THROW_ON_ERROR);
} catch (JsonException) {
return false;
$this->room_tags[$room_id] = [
...$this->room_tags[$room_id],
...$room_tags
];
}
}
foreach ($rooms as $room_entry) {
// TODO: Check types
if (!isset($room_entry['url'])) {
log_value($room_entry);
$entry_missing_url = true;
continue;
}
if (!isset($room_entry['tags'])) {
log_value($room_entry);
$entry_missing_tags = true;
continue;
}
$url = $room_entry['url'];
$tags = explode(',', $room_entry['tags']);
$room_token = url_get_token($url);
$pubkey = url_get_pubkey($url);
$pubkey_4 = substr($pubkey, 0, 4);
$room_id = "$room_token+$pubkey_4";
private function process_sources(): bool {
$source_sdir = SDIRCommunitySource::from_contents($this->contents_sdir);
$tags = array_map(function(?string $tag) {
return CommunitySources::preprocess_tag($tag);
}, $tags);
$source_asgl = ASGLCommunitySource::from_contents($this->contents_asgl);
$tags = array_filter(
$tags, function(?string $tag) {
return strlen($tag) != 0;
}
);
$source_sdir && $this->add_tags($source_sdir->get_tags());
$this->room_tags[$room_id] = $tags;
}
$source_asgl && $this->add_tags($source_asgl->get_tags());
if ($entry_missing_url) {
log_error("One or more room entries from session.directory is missing the 'url' parameter.");
if (!$source_sdir) {
return false;
}
if ($entry_missing_tags) {
log_error("One or more room entries from session.directory is missing the 'tags' parameter.");
if (!$source_asgl) {
return false;
}
return true;
@ -125,7 +295,7 @@
* Returns all join URLs found.
* @return string[] Join URLs.
*/
function get_join_urls(): array {
public function get_join_urls(): array {
return array_unique(
parse_join_links($this->contents_aggregated)
);
@ -134,14 +304,14 @@
/**
* Return known tags for the given room.
* @param string $room_id Room identifier.
* @return string[] Array of string tags.
* @return \CommunityTag[] Array of string tags.
*/
function get_room_tags($room_id): array {
public function get_room_tags($room_id): array {
if (!isset($this->room_tags[$room_id])) {
return [];
}
return array_slice($this->room_tags[$room_id], 0);
return $this->room_tags[$room_id];
}
}
?>

@ -0,0 +1,124 @@
<?php
require_once 'utils.php';
class TagType {
private function __construct() {}
const USER_TAG = 0;
const RESERVED_TAG = 1;
const WARNING_TAG = 2;
}
class CommunityTag implements JsonSerializable {
public function __construct(string $text, int $tag_type = TagType::USER_TAG) {
$this->text = $text;
$this->type = $tag_type;
}
public readonly int $type;
public readonly string $text;
/**
* Returns a lowercase representation of the tag for purposes of de-duping.
*/
public function __toString(): string {
return strtolower($this->text);
}
public function jsonSerialize(): mixed {
if ($this->type != TagType::USER_TAG) {
throw new LogicException("Should not serialize derived tags.");
}
return $this->text;
}
private static function preprocess_tag(?string $tag) {
$tag = trim($tag);
if (strlen($tag) == 0) {
return $tag;
}
$tag = html_sanitize(html_entity_decode($tag));
if ($tag[0] == '#') {
return substr($tag, 1);
}
return $tag;
}
/**
* @param string[] $tag_array
* @return \CommunityTag[]
*/
private static function from_tag_array(array $tag_array) {
$tags = array_map(function(?string $tag) {
return CommunityTag::preprocess_tag($tag);
}, $tag_array);
$tags = array_filter(
$tags, function(?string $tag) {
return strlen($tag) != 0;
}
);
return array_map(function(string $tag) {
return new CommunityTag($tag);
}, $tags);
}
/**
* Returns the user tags given, without any reserved tags.
* @param string[] $tags
* @return \CommunityTag[]
*/
public static function from_user_tags(array $tags): array {
$tags_user = array_filter(
$tags,
function($tag) {
return !CommunityTag::is_reserved_tag($tag);
}
);
return CommunityTag::from_tag_array($tags_user);
}
/**
* @param string[] $details_array Array of string tags.
* @return \CommunityTag[]
*/
public static function from_details_array(array $details_array): array {
return CommunityTag::from_user_tags($details_array);
}
/**
* @param \CommunityTag[] $tags
* @return \CommunityTag[]
*/
public static function dedupe_tags(array $tags) {
return array_unique($tags);
}
public function get_tag_type(): string {
return match($this->type) {
TagType::USER_TAG => 'user',
TagType::RESERVED_TAG => 'reserved',
TagType::WARNING_TAG => 'warning'
};
}
/**
* @var string[] RESERVED_TAGS
* Array of derived tags unavailable for manual tagging.
*/
private const RESERVED_TAGS = ["official"];
/**
* Checks whether the given manual tag can be accepted.
*/
public static function is_reserved_tag(string $tag): bool {
return in_array(strtolower($tag), CommunityTag::RESERVED_TAGS);
}
}
?>

@ -122,9 +122,10 @@
* to an unreachable host.
*/
function curl_get_response(string $url, int $retries, $stop_on_codes = [404], $curlopts = []) {
global $FAST_FETCH_MODE;
// use separate timeouts to reliably get data from Chinese server with repeated tries
$connecttimeout = 2; // wait at most X seconds to connect
$timeout = 3; // can't take longer than X seconds for the whole curl process
$timeout = $FAST_FETCH_MODE ? 1.5 : 3; // can't take longer than X seconds for the whole curl process
$sleep = 2; // sleep between tries in seconds
// takes at most ($timeout + $sleep) * $retries seconds
@ -200,6 +201,18 @@
return $query_components['public_key'];
}
/**
* Computes a room's ID from a join URL.
* @param string $join_url Join URL for Session Community.
* @return string Room identifier per our format.
*/
function url_get_room_id(string $join_url) {
$room_token = url_get_token($join_url);
$pubkey = url_get_pubkey($join_url);
$pubkey_4 = substr($pubkey, 0, 4);
return "$room_token+$pubkey_4";
}
/**
* Extracts join links that match $REGEX_JOIN_LINK.
* @param ?string $html Text to find join URLs in.

@ -75,23 +75,23 @@
<td class="td_name"
title="Click here to preview '<?=$name?>'"
data-sort-by="<?=strtolower($name)?>"
itemprop="name"
>
<a
href="<?=$preview_link?>"
target="_blank"
title="'<?=$name?>' preview"
rel="noopener noreferrer"
itemprop="name"
><?=
$name
?></a>
<span class="room-labels-container">
<?php foreach ($room->get_room_tags() as $tag): ?>
<span
class="room-label badge<?=CommunityRoom::is_reserved_tag($tag) ? ' badge-special' : ''?>"
class="room-label room-label-<?=$tag->get_tag_type()?> badge"
title="<?=$tag?>"
><?=
truncate($tag, 16)
truncate($tag->text, 16)
?></span>
<?php endforeach; ?>
</span>

Loading…
Cancel
Save