More work on #5

This commit is contained in:
Timothy Warren 2018-10-01 10:50:22 -04:00
parent 6f717e6ab7
commit 17a9539e94
4 changed files with 177 additions and 80 deletions

View File

@ -66,6 +66,18 @@ class APIRequestBuilder {
*/ */
protected $request; protected $request;
/**
* Do a basic minimal GET request
*
* @param string $uri
* @return Request
*/
public static function simpleRequest(string $uri): Request
{
return (new Request($uri))
->withHeader('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:64.0) Gecko/20100101 Firefox/64.0 ');
}
/** /**
* Set an authorization header * Set an authorization header
* *

View File

@ -63,7 +63,7 @@ final class ParallelAPIRequest {
} }
/** /**
* Actually make the requests * Make the requests, and return the body for each
* *
* @return array * @return array
* @throws \Throwable * @throws \Throwable
@ -83,4 +83,25 @@ final class ParallelAPIRequest {
return wait(all($promises)); return wait(all($promises));
} }
/**
* Make the requests and return the response objects
*
* @return array
* @throws \Throwable
*/
public function getResponses(): array
{
$client = new HummingbirdClient();
$promises = [];
foreach ($this->requests as $key => $url)
{
$promises[$key] = call(function () use ($client, $url) {
return yield $client->request($url);
});
}
return wait(all($promises));
}
} }

View File

@ -16,11 +16,14 @@
namespace Aviat\AnimeClient\Command; namespace Aviat\AnimeClient\Command;
use const Aviat\AnimeClient\MILLI_FROM_NANO;
use const Aviat\AnimeClient\SRC_DIR;
use function Amp\Promise\wait;
use Aviat\AnimeClient\API\{ use Aviat\AnimeClient\API\{
APIRequestBuilder, APIRequestBuilder,
JsonAPI, JsonAPI,
FailedResponseException,
ParallelAPIRequest ParallelAPIRequest
}; };
@ -38,6 +41,7 @@ final class MALIDCheck extends BaseCommand {
* @param array $options * @param array $options
* @throws \Aviat\Ion\Di\Exception\ContainerException * @throws \Aviat\Ion\Di\Exception\ContainerException
* @throws \Aviat\Ion\Di\Exception\NotFoundException * @throws \Aviat\Ion\Di\Exception\NotFoundException
* @throws \Throwable
*/ */
public function execute(array $args, array $options = []): void public function execute(array $args, array $options = []): void
{ {
@ -45,30 +49,25 @@ final class MALIDCheck extends BaseCommand {
$this->setCache($this->container->get('cache')); $this->setCache($this->container->get('cache'));
$this->kitsuModel = $this->container->get('kitsu-model'); $this->kitsuModel = $this->container->get('kitsu-model');
// @TODO: Stuff! $kitsuAnimeIdList = $this->formatKitsuList('anime');
} $animeCount = count($kitsuAnimeIdList);
$this->echoBox("{$animeCount} mappings for Anime");
$animeMappings = $this->checkMALIds($kitsuAnimeIdList, 'anime');
$this->mappingStatus($animeMappings, $animeCount, 'anime');
private function getListIds() $kitsuMangaIdList = $this->formatKitsuList('manga');
{ $mangaCount = count($kitsuMangaIdList);
$this->getListCounts('anime'); $this->echoBox("{$mangaCount} mappings for Manga");
$this->getListCounts('manga'); $mangaMappings = $this->checkMALIds($kitsuMangaIdList, 'manga');
$this->mappingStatus($mangaMappings, $mangaCount, 'manga');
} $publicDir = realpath(SRC_DIR . '/../public') . '/';
file_put_contents($publicDir . 'mal_mappings.json', Json::encode([
'anime' => $animeMappings,
'manga' => $mangaMappings,
]));
private function getListCounts($type): void $this->echoBox('Mapping file saved to "' . $publicDir . 'mal_mappings.json' . '"');
{
$uType = ucfirst($type);
$kitsuCount = 0;
try
{
$kitsuCount = $this->kitsuModel->{"get{$uType}ListCount"}();
} catch (FailedResponseException $e)
{
dump($e);
}
$this->echoBox("Number of Kitsu {$type} list items: {$kitsuCount}");
} }
/** /**
@ -77,9 +76,12 @@ final class MALIDCheck extends BaseCommand {
* @param string $type * @param string $type
* @return array * @return array
*/ */
protected function formatKitsuList(string $type = 'anime'): array private function formatKitsuList(string $type = 'anime'): array
{ {
$data = $this->kitsuModel->{'getFull' . ucfirst($type) . 'List'}(); $options = [
'include' => 'media,media.mappings',
];
$data = $this->kitsuModel->{'getFullRaw' . ucfirst($type) . 'List'}($options);
if (empty($data)) if (empty($data))
{ {
@ -87,15 +89,23 @@ final class MALIDCheck extends BaseCommand {
} }
$includes = JsonAPI::organizeIncludes($data['included']); $includes = JsonAPI::organizeIncludes($data['included']);
$includes['mappings'] = $this->filterMappings($includes['mappings'], $type);
// Only bother with mappings from MAL that are of the specified media type
$includes['mappings'] = array_filter($includes['mappings'], function ($mapping) use ($type) {
return $mapping['externalSite'] === "myanimelist/{$type}";
});
$output = []; $output = [];
foreach ($data['data'] as $listItem) foreach ($data['data'] as $listItem)
{ {
$id = $listItem['relationships'][$type]['data']['id']; $id = $listItem['relationships']['media']['data']['id'];
$mediaItem = $includes[$type][$id];
$potentialMappings = $includes[$type][$id]['relationships']['mappings']; // Set titles
$listItem['titles'] = $mediaItem['titles'];
$potentialMappings = $mediaItem['relationships']['mappings'];
$malId = NULL; $malId = NULL;
foreach ($potentialMappings as $mappingId) foreach ($potentialMappings as $mappingId)
@ -112,81 +122,132 @@ final class MALIDCheck extends BaseCommand {
continue; continue;
} }
$output[$listItem['id']] = [ // Group by malIds to simplify lookup of media details
'id' => $listItem['id'], // for checking validity of the malId mappings
'malId' => $malId, $output[$malId] = $listItem;
'data' => $listItem['attributes'], }
ksort($output);
return $output;
}
/**
* Check for valid Kitsu -> MAL mapping
*
* @param array $kitsuList
* @param string $type
* @return array
* @throws \Throwable
*/
private function checkMALIds(array $kitsuList, string $type): array
{
$goodMappings = [];
$badMappings = [];
$suspectMappings = [];
$responses = $this->makeMALRequests(array_keys($kitsuList), $type);
// If the page returns a 404, put it in the bad mappings list
// otherwise, do a search against the titles, to see if the mapping
// seems valid
foreach($responses as $id => $response)
{
$body = wait($response->getBody());
$titles = $kitsuList[$id]['titles'];
if ($response->getStatus() === 404)
{
dump($titles);
die();
$badMappings[$id] = $titles;
}
else
{
$titleMatches = FALSE;
// Attempt to determine if the id matches
// By searching for a matching title
foreach($titles as $title)
{
if (empty($title))
{
continue;
}
if (mb_stripos($body, $title) !== FALSE)
{
// echo "MAL id {$id} seems to match \"{$title}\"\n";
$titleMatches = TRUE;
$goodMappings[$id] = $title;
// Continue on outer loop
continue 2;
}
}
if ( ! $titleMatches)
{
$suspectMappings[$id] = $titles;
}
else
{
$goodMappings[$id] = $titles;
}
}
}
return [
'good' => $goodMappings,
'bad' => $badMappings,
'suspect' => $suspectMappings,
]; ];
} }
return $output; private function makeMALRequests(array $ids, string $type): array
}
/**
* Filter Kitsu mappings for the specified type
*
* @param array $includes
* @param string $type
* @return array
*/
protected function filterMappings(array $includes, string $type = 'anime'): array
{ {
$output = []; $baseUrl = "https://myanimelist.net/{$type}/";
foreach ($includes as $id => $mapping) $requestChunks = array_chunk($ids, 10, TRUE);
{ $responses = [];
if ($mapping['externalSite'] === "myanimelist/{$type}")
{
$output[$id] = $mapping;
}
}
return $output; // Chunk parallel requests so that we don't hit rate
} // limiting, and get spurious 404 HTML responses
foreach($requestChunks as $idChunk)
protected function checkMALIds(array $kitsuList, string $type)
{ {
$requester = new ParallelAPIRequest(); $requester = new ParallelAPIRequest();
foreach($idChunk as $id)
{
$request = APIRequestBuilder::simpleRequest($baseUrl . $id);
echo "Checking {$baseUrl}{$id} \n";
$requester->addRequest($request, (string)$id);
} }
/** foreach($requester->getResponses() as $id => $response)
* Create/Update list items on Kitsu
*
* @param array $itemsToUpdate
* @param string $action
* @param string $type
*/
protected function updateKitsuListItems(array $itemsToUpdate, string $action = 'update', string $type = 'anime'): void
{ {
$requester = new ParallelAPIRequest(); $responses[$id] = $response;
foreach ($itemsToUpdate as $item)
{
if ($action === 'update')
{
$requester->addRequest($this->kitsuModel->updateListItem($item));
} else if ($action === 'create')
{
$requester->addRequest($this->kitsuModel->createListItem($item));
}
} }
$responses = $requester->makeRequests(); echo "Finished checking chunk of 10 entries\n";
foreach ($responses as $key => $response) // Rate limiting is annoying :(
{ sleep(1);
$responseData = Json::decode($response); // time_nanosleep(1, 0 * MILLI_FROM_NANO);
}
$id = $itemsToUpdate[$key]['id']; return $responses;
if ( ! array_key_exists('errors', $responseData)) }
private function mappingStatus(array $mapping, int $count, string $type): void
{ {
$verb = ($action === 'update') ? 'updated' : 'created'; $good = count($mapping['good']);
$this->echoBox("Successfully {$verb} Kitsu {$type} list item with id: {$id}"); $bad = count($mapping['bad']);
} else $suspect = count($mapping['suspect']);
{
dump($responseData); $uType = ucfirst($type);
$verb = ($action === 'update') ? 'update' : 'create';
$this->echoBox("Failed to {$verb} Kitsu {$type} list item with id: {$id}"); $this->echoBox("{$uType} mappings: {$good}/{$count} Good, {$suspect}/{$count} Suspect, {$bad}/{$count} Broken");
}
}
} }
} }

View File

@ -25,3 +25,6 @@ const NOT_FOUND_METHOD = 'notFound';
const SESSION_SEGMENT = 'Aviat\AnimeClient\Auth'; const SESSION_SEGMENT = 'Aviat\AnimeClient\Auth';
const SRC_DIR = __DIR__; const SRC_DIR = __DIR__;
const USER_AGENT = "Tim's Anime Client/4.0"; const USER_AGENT = "Tim's Anime Client/4.0";
// Why doesn't this already exist?
const MILLI_FROM_NANO = 1000 * 1000;