Merge branch 'community-labels'
commit
564710d8b5
@ -0,0 +1,317 @@
|
||||
<?php
|
||||
require_once 'utils.php';
|
||||
require_once 'tags.php';
|
||||
|
||||
class SDIRCommunitySource {
|
||||
private function __construct(string $contents) {
|
||||
$this->contents = $contents;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create new instance of this source from contents.
|
||||
* Returns false if processing the source fails.
|
||||
* @return \SDIRCommunitySource|false
|
||||
*/
|
||||
public static function from_contents(string $contents) {
|
||||
$source = new SDIRCommunitySource($contents);
|
||||
|
||||
if (!$source->sdir_process_tags()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return $source;
|
||||
}
|
||||
|
||||
private readonly string $contents;
|
||||
|
||||
/**
|
||||
* @var string[][] $tags Array associating room IDs with string tag arrays.
|
||||
*/
|
||||
private array $tags;
|
||||
|
||||
private static function sdir_validate_entry(
|
||||
array $room_entry,
|
||||
bool &$missing_url,
|
||||
bool &$missing_tags
|
||||
): bool {
|
||||
if (!isset($room_entry['url']) || !is_string($room_entry['url'])) {
|
||||
log_value($room_entry);
|
||||
$missing_url = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!isset($room_entry['tags']) || !is_string($room_entry['tags'])) {
|
||||
log_value($room_entry);
|
||||
$missing_tags = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static function sdir_report_errors(bool $entry_missing_url, bool $entry_missing_tags) {
|
||||
if ($entry_missing_url) {
|
||||
log_error("One or more room entries from session.directory is missing the 'url' parameter.");
|
||||
}
|
||||
|
||||
if ($entry_missing_tags) {
|
||||
log_error("One or more room entries from session.directory is missing the 'tags' parameter.");
|
||||
}
|
||||
}
|
||||
|
||||
private function get_sdir_entries(): array|bool {
|
||||
try {
|
||||
return json_decode($this->contents, true, 512, JSON_THROW_ON_ERROR);
|
||||
} catch (JsonException) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private function sdir_process_tags(): bool {
|
||||
$entry_missing_url = false;
|
||||
$entry_missing_tags = false;
|
||||
|
||||
$rooms = SDIRCommunitySource::get_sdir_entries($this->contents);
|
||||
|
||||
if (!$rooms) {
|
||||
log_error("Could not parse entries from session.directory.");
|
||||
return false;
|
||||
}
|
||||
|
||||
foreach ($rooms as $room_entry) {
|
||||
if (!SDIRCommunitySource::sdir_validate_entry(
|
||||
$room_entry, $entry_missing_url, $entry_missing_tags
|
||||
)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$url = $room_entry['url'];
|
||||
$tags = $room_entry['tags'];
|
||||
|
||||
$room_id = url_get_room_id($url);
|
||||
|
||||
$this->tags[$room_id] = explode(',', $tags);
|
||||
}
|
||||
|
||||
SDIRCommunitySource::sdir_report_errors($entry_missing_url, $entry_missing_tags);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[][] Array associating room IDs with string tag arrays.
|
||||
*/
|
||||
public function get_tags(): array {
|
||||
return $this->tags;
|
||||
}
|
||||
}
|
||||
|
||||
class ASGLCommunitySource {
|
||||
private function __construct(string $contents) {
|
||||
$this->contents = $contents;
|
||||
}
|
||||
|
||||
private readonly string $contents;
|
||||
|
||||
/**
|
||||
* @var string[][] $tags;
|
||||
*/
|
||||
private array $tags = [];
|
||||
|
||||
/**
|
||||
* @return \ASGLCommunitySource|false
|
||||
*/
|
||||
public static function from_contents(string $contents) {
|
||||
$source = new ASGLCommunitySource($contents);
|
||||
|
||||
if(!$source->asgl_process_tags()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return $source;
|
||||
}
|
||||
|
||||
private function asgl_process_tags(): bool {
|
||||
$lines = explode("\n", $this->contents);
|
||||
// $last_headings = [];
|
||||
$last_room_id = null;
|
||||
$room_tags = [];
|
||||
|
||||
foreach ($lines as $line) {
|
||||
ASGLCommunitySource::asgl_process_line($line, $last_room_id, $room_tags);
|
||||
|
||||
if ($last_room_id != null && count($room_tags) > 0) {
|
||||
$this->tags[$last_room_id] = $room_tags;
|
||||
$last_room_id = null;
|
||||
$room_tags = [];
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static function asgl_process_line(
|
||||
?string $line,
|
||||
?string &$last_room_id,
|
||||
array &$room_tags
|
||||
) {
|
||||
$line = trim($line);
|
||||
|
||||
if (strlen($line) == 0) {
|
||||
$last_room_id = null;
|
||||
return;
|
||||
}
|
||||
|
||||
$urls = parse_join_links($line);
|
||||
|
||||
if (count($urls) == 1 && $urls[0] == $line) {
|
||||
$last_room_id = url_get_room_id($urls[0]);
|
||||
$room_tags = [];
|
||||
return;
|
||||
}
|
||||
|
||||
if (str_starts_with($line, "hashtag")) {
|
||||
$room_tags = ASGLCommunitySource::read_asgl_tags($line);
|
||||
}
|
||||
}
|
||||
|
||||
private static function read_asgl_tags(string $line): array {
|
||||
$matches = [];
|
||||
preg_match_all('/`#([^`]+)`/', $line, $matches);
|
||||
// Return first group matches.
|
||||
return $matches[1];
|
||||
}
|
||||
|
||||
public function get_tags(): array {
|
||||
|
||||
return $this->tags;
|
||||
}
|
||||
}
|
||||
|
||||
class CommunitySources {
|
||||
private const SOURCES = array(
|
||||
'ASGL' => 'https://raw.githubusercontent.com/GNU-Linux-libre/Awesome-Session-Group-List/main/README.md',
|
||||
'LOKI' => 'https://lokilocker.com/Mods/Session-Groups/wiki/Session-Open-Groups',
|
||||
'SDIR' => 'https://session.directory/?all=groups',
|
||||
'SDIR-BASE' => 'https://session.directory/',
|
||||
'SDIR-PATTERN' => '/view_session_group_user_lokinet\.php\?id=\d+/',
|
||||
'SDIR-JSON' => 'https://session.directory/scrape.php',
|
||||
'FARK' => 'https://freearkham.cc/'
|
||||
);
|
||||
|
||||
private readonly string $contents_asgl;
|
||||
private readonly string $contents_loki;
|
||||
private readonly string $contents_sdir;
|
||||
private readonly string $contents_fark;
|
||||
private readonly string $contents_aggregated;
|
||||
|
||||
/**
|
||||
* Arraying associating room identifiers with arrays of raw tags.
|
||||
* @var array<string,string[]> $room_tags
|
||||
*/
|
||||
private array $room_tags = [];
|
||||
|
||||
/**
|
||||
* Fetches and saves known sources of Session Community join links.
|
||||
*/
|
||||
public function __construct() {
|
||||
log_info("Requesting Awesome Session Group list...");
|
||||
$this->contents_asgl = CommunitySources::fetch_source('ASGL');
|
||||
|
||||
log_info("Requesting Lokilocker Mods Open Group list...");
|
||||
$this->contents_loki = CommunitySources::fetch_source('LOKI');
|
||||
|
||||
log_info("Requesting session.directory list...");
|
||||
$this->contents_sdir = CommunitySources::fetch_source('SDIR-JSON');
|
||||
|
||||
log_info("Requesting FreeArkham.cc list...");
|
||||
$this->contents_fark = CommunitySources::fetch_source('FARK');
|
||||
|
||||
log_info("Parsing extra information...");
|
||||
if (!$this->process_sources()) {
|
||||
log_error("Could not parse extra information from one or more sources.");
|
||||
}
|
||||
|
||||
log_info('Done fetching sources.');
|
||||
|
||||
$this->contents_aggregated =
|
||||
$this->contents_asgl .
|
||||
$this->contents_fark .
|
||||
$this->contents_loki .
|
||||
$this->contents_sdir;
|
||||
}
|
||||
|
||||
private static function fetch_source(string $source_key) {
|
||||
$url = CommunitySources::SOURCES[$source_key];
|
||||
|
||||
$contents = file_get_contents($url);
|
||||
log_debug($http_response_header[0]);
|
||||
|
||||
if (!$contents) {
|
||||
log_error("Could not fetch source from $url.");
|
||||
return "";
|
||||
}
|
||||
|
||||
return $contents;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[][] $tags Array associating room IDs to tag arrays
|
||||
*/
|
||||
private function add_tags(array $tags) {
|
||||
foreach ($tags as $room_id => $room_tags) {
|
||||
if (!isset($this->room_tags[$room_id])) {
|
||||
$this->room_tags[$room_id] = [];
|
||||
}
|
||||
|
||||
$this->room_tags[$room_id] = [
|
||||
...$this->room_tags[$room_id],
|
||||
...$room_tags
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
private function process_sources(): bool {
|
||||
$source_sdir = SDIRCommunitySource::from_contents($this->contents_sdir);
|
||||
|
||||
$source_asgl = ASGLCommunitySource::from_contents($this->contents_asgl);
|
||||
|
||||
$source_sdir && $this->add_tags($source_sdir->get_tags());
|
||||
|
||||
$source_asgl && $this->add_tags($source_asgl->get_tags());
|
||||
|
||||
if (!$source_sdir) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!$source_asgl) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all join URLs found.
|
||||
* @return string[] Join URLs.
|
||||
*/
|
||||
public function get_join_urls(): array {
|
||||
return array_unique(
|
||||
parse_join_links($this->contents_aggregated)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return known tags for the given room.
|
||||
* @param string $room_id Room identifier.
|
||||
* @return \CommunityTag[] Array of string tags.
|
||||
*/
|
||||
public function get_room_tags($room_id): array {
|
||||
if (!isset($this->room_tags[$room_id])) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return $this->room_tags[$room_id];
|
||||
}
|
||||
}
|
||||
?>
|
@ -0,0 +1,182 @@
|
||||
<?php
|
||||
require_once 'utils.php';
|
||||
|
||||
class TagType {
|
||||
private function __construct() {}
|
||||
const USER_TAG = 0;
|
||||
const RESERVED_TAG = 1;
|
||||
const WARNING_TAG = 2;
|
||||
}
|
||||
|
||||
class CommunityTag implements JsonSerializable {
|
||||
public function __construct(
|
||||
string $text,
|
||||
int $tag_type = TagType::USER_TAG,
|
||||
string $description = ""
|
||||
) {
|
||||
$this->text = $text;
|
||||
$this->type = $tag_type;
|
||||
$this->description =
|
||||
empty($description) ? "Tag: $text" : $description;
|
||||
}
|
||||
|
||||
public readonly int $type;
|
||||
|
||||
public readonly string $text;
|
||||
|
||||
public readonly string $description;
|
||||
|
||||
/**
|
||||
* Returns a lowercase representation of the tag for purposes of de-duping.
|
||||
*/
|
||||
public function __toString(): string {
|
||||
return strtolower($this->text);
|
||||
}
|
||||
|
||||
public function jsonSerialize(): mixed {
|
||||
// Only used for passing to DOM
|
||||
$details = get_object_vars($this);
|
||||
$details['text'] = html_sanitize($details['text']);
|
||||
$details['description'] = html_sanitize($details['description']);
|
||||
$details['type'] = $this->get_tag_type();
|
||||
return $details;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \CommunityTag[] $tags
|
||||
* @return string[]
|
||||
*/
|
||||
public static function cacheable_room_tags(array $tags) {
|
||||
return array_map(
|
||||
'CommunityTag::__toString',
|
||||
array_filter($tags, function(\CommunityTag $tag) {
|
||||
return $tag->type == TagType::USER_TAG;
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
private static function preprocess_tag(?string $tag) {
|
||||
$tag = trim($tag);
|
||||
|
||||
if (strlen($tag) == 0) {
|
||||
return $tag;
|
||||
}
|
||||
|
||||
$tag = html_sanitize(html_entity_decode($tag));
|
||||
|
||||
if ($tag[0] == '#') {
|
||||
return substr($tag, 1);
|
||||
}
|
||||
|
||||
return $tag;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $tag_array
|
||||
* @return \CommunityTag[]
|
||||
*/
|
||||
private static function from_tag_array(array $tag_array) {
|
||||
$tags = array_map(function(?string $tag) {
|
||||
return CommunityTag::preprocess_tag($tag);
|
||||
}, $tag_array);
|
||||
|
||||
$tags = array_filter(
|
||||
$tags, function(?string $tag) {
|
||||
return strlen($tag) != 0;
|
||||
}
|
||||
);
|
||||
|
||||
return array_map(function(string $tag) {
|
||||
return new CommunityTag($tag);
|
||||
}, $tags);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the user tags given, without any reserved tags.
|
||||
* @param string[] $tags
|
||||
* @param bool $remove_redundant Removes duplicate and obvious tags.
|
||||
* @return \CommunityTag[]
|
||||
*/
|
||||
public static function from_user_tags(
|
||||
array $tags, bool $remove_redundant = false
|
||||
): array {
|
||||
$tags_user = array_filter(
|
||||
$tags,
|
||||
function($tag) {
|
||||
return !CommunityTag::is_reserved_tag($tag);
|
||||
}
|
||||
);
|
||||
|
||||
$tags_built = CommunityTag::from_tag_array($tags_user);
|
||||
|
||||
if ($remove_redundant) {
|
||||
$tags_built = CommunityTag::dedupe_tags($tags_built);
|
||||
$tags_built = array_filter($tags_built, function(\CommunityTag $tag) {
|
||||
$text = strtolower($tag->text);
|
||||
return !in_array($text, CommunityTag::REDUNDANT_TAGS);
|
||||
});
|
||||
}
|
||||
|
||||
return $tags_built;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $details_array Array of string tags.
|
||||
* @return \CommunityTag[]
|
||||
*/
|
||||
public static function from_details_array(array $details_array): array {
|
||||
return CommunityTag::from_user_tags($details_array);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \CommunityTag[] $tags
|
||||
* @return \CommunityTag[]
|
||||
*/
|
||||
public static function dedupe_tags(array $tags) {
|
||||
return array_unique($tags);
|
||||
}
|
||||
|
||||
public function get_tag_classname(): string {
|
||||
$tag_type = $this->get_tag_type();
|
||||
$classname = "room-label-$tag_type";
|
||||
if (CommunityTag::is_showcased_tag($this->text)) {
|
||||
$classname .= " room-label-showcased";
|
||||
}
|
||||
return $classname;
|
||||
}
|
||||
|
||||
public function get_tag_type(): string {
|
||||
return match($this->type) {
|
||||
TagType::USER_TAG => 'user',
|
||||
TagType::RESERVED_TAG => 'reserved',
|
||||
TagType::WARNING_TAG => 'warning'
|
||||
};
|
||||
}
|
||||
/**
|
||||
* @var string[] RESERVED_TAGS
|
||||
* Array of derived tags unavailable for manual tagging.
|
||||
*/
|
||||
private const RESERVED_TAGS = ["official", "nsfw", "modded", "not modded"];
|
||||
|
||||
private const SHOWCASED_TAGS = ["official"];
|
||||
|
||||
private const REDUNDANT_TAGS = ["session"];
|
||||
|
||||
public const NSFW_KEYWORDS = ["nsfw", "porn", "erotic", "18+"];
|
||||
|
||||
public const CHECK_MARK = "✅";
|
||||
|
||||
public const WARNING_ICON = "⚠️";
|
||||
|
||||
/**
|
||||
* Checks whether the given manual tag can be accepted.
|
||||
*/
|
||||
public static function is_reserved_tag(string $tag): bool {
|
||||
return in_array(strtolower($tag), CommunityTag::RESERVED_TAGS);
|
||||
}
|
||||
|
||||
public static function is_showcased_tag(string $tag): bool {
|
||||
return in_array(strtolower($tag), CommunityTag::SHOWCASED_TAGS);
|
||||
}
|
||||
}
|
||||
?>
|
Loading…
Reference in New Issue