radiostasis/scripts/populate-links.csx

134 lines
4 KiB
Text
Raw Normal View History

2023-04-06 15:51:53 -05:00
#!/usr/bin/env dotnet-script
#nullable enable
#r "nuget: GoogleApi, 4.5.4"
#r "nuget: Microsoft.Data.Sqlite, 7.0.4"
/* This script loops through each series and does a Google search,
* then presents the first 20 results to the user. The user can then
* specify which urls to save in the database for that series. Those
* urls will then be used by the generate-descriptions.csx script to
* generate series synopses using ChatGPT. */
2023-04-06 15:51:53 -05:00
using System.Text.Json;
using GoogleApi;
using GoogleApi.Entities.Common.Enums;
using GoogleApi.Entities.Search.Common;
using GoogleApi.Entities.Search.Web.Request;
using Microsoft.Data.Sqlite;
using SQLitePCL;
private readonly string BASE_PATH = Path.GetFullPath("..");
private readonly string CONNECTION_STRING =
$"Data Source={Path.Combine(BASE_PATH, "db", "radiostasis.db")}";
private string configJson = File.ReadAllText(
Path.Combine(BASE_PATH, "scripts", "config.json"));
private Dictionary<string, string> config =
JsonSerializer.Deserialize<Dictionary<string, string>>(configJson)
?? new();
private IEnumerable<(int, string, string)> DoQuery(string query, int page = 1) {
var urls = new List<(int, string, string)>();
var start = ((page - 1) * 10) + 1;
var request = new WebSearchRequest {
Key = config["googleApiKey"],
SearchEngineId = config["googleSearchId"],
Query = query,
Options = new SearchOptions {
Number = 10,
StartIndex = start,
},
};
var response = GoogleSearch.WebSearch.Query(request)
?? throw new Exception("response was null");
if (response.Status != Status.Ok)
throw new Exception("response was not ok");
if (response.Items == null)
throw new Exception("response.Items was null");
var nbr = start;
foreach (var item in response.Items) {
urls.Add((nbr++, item.Link, item.Title));
}
return urls;
}
private IEnumerable<string> GetSearchUrls(string showName, string query) {
var urls = DoQuery(query).Concat(DoQuery(query, 2));
foreach (var url in urls) {
WriteLine($"{url.Item1} - {url.Item2} [{url.Item3}]");
}
WriteLine();
WriteLine($"The show is '{showName}'.");
Write("Urls (eg. 1,3,5) or 'r' to retry: ");
var use = ReadLine();
if (use == "r") {
Write("Retry with query: ");
var newQuery = ReadLine() ?? query;
return GetSearchUrls(showName, newQuery);
} else if (string.IsNullOrEmpty(use)) {
return Array.Empty<string>();
}
return use.Split(",").Select(n => int.Parse(n) - 1)
.Select(i => urls.ElementAt(i).Item2).Order();
}
private IEnumerable<string> GetShowUrls(string showName) {
return GetSearchUrls(showName, $"{showName} old time radio show");
}
private void PopulateMissingUrls() {
var shows = new List<(string, string)>();
using (var connection = new SqliteConnection(CONNECTION_STRING)) {
connection.Open();
using var cmd = connection.CreateCommand();
cmd.CommandText =
@"select series_slug, title from series s
where not exists (
select 1 from series_links
where series_slug=s.series_slug)";
using var reader = cmd.ExecuteReader();
while (reader.Read()) {
var slug = reader.GetString(0);
var title = reader.GetString(1);
shows.Add((slug, title));
}
}
foreach (var show in shows) {
var urls = GetShowUrls(show.Item2);
SaveUrls(show.Item1, urls);
}
}
private void SaveUrls(string slug, IEnumerable<string> urls) {
using var connection = new SqliteConnection(CONNECTION_STRING);
connection.Open();
foreach (var url in urls) {
using var cmd = connection.CreateCommand();
cmd.CommandText =
@"insert into series_links (series_slug, link_url)
values (@slug, @url)";
var slugParam = cmd.CreateParameter();
slugParam.ParameterName = "@slug";
slugParam.Value = slug;
var urlParam = cmd.CreateParameter();
urlParam.ParameterName = "@url";
urlParam.Value = url;
cmd.Parameters.Add(slugParam);
cmd.Parameters.Add(urlParam);
cmd.ExecuteNonQuery();
}
}
Batteries.Init();
PopulateMissingUrls();