Speedup URL retrieval

This commit is contained in:
EonaCat 2023-07-19 17:15:22 +02:00
parent 6ae0e0b779
commit e552bf1925
2 changed files with 57 additions and 65 deletions

View File

@ -1,4 +1,5 @@
using System; using System;
using System.Collections.Concurrent;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Net.Http; using System.Net.Http;
@ -12,6 +13,7 @@ namespace EonaCat.DnsTester.Helpers
{ {
private static readonly RandomNumberGenerator RandomNumberGenerator = RandomNumberGenerator.Create(); private static readonly RandomNumberGenerator RandomNumberGenerator = RandomNumberGenerator.Create();
public static event EventHandler<string> Log; public static event EventHandler<string> Log;
public static bool UseSearchEngineYahoo { get; set; } public static bool UseSearchEngineYahoo { get; set; }
public static bool UseSearchEngineBing { get; set; } public static bool UseSearchEngineBing { get; set; }
public static bool UseSearchEngineGoogle { get; set; } public static bool UseSearchEngineGoogle { get; set; }
@ -20,82 +22,70 @@ namespace EonaCat.DnsTester.Helpers
public static bool UseSearchEngineStartPage { get; set; } public static bool UseSearchEngineStartPage { get; set; }
public static bool UseSearchEngineYandex { get; set; } public static bool UseSearchEngineYandex { get; set; }
private static async Task<List<string>> GetRandomUrlsAsync(int totalUrls) private static async Task<List<string>> GetRandomUrlsAsync(int totalUrls)
{ {
var urls = new ConcurrentDictionary<string, byte>();
var letters = GetRandomLetters(); var letters = GetRandomLetters();
var searchEngineUrls = GetSearchEngines(); var searchEngineUrls = GetSearchEngines().ToList();
var rand = new Random(); var random = new Random();
var urls = new List<string>();
while (urls.Count < totalUrls) while (urls.Count < totalUrls && searchEngineUrls.Count > 0)
{ {
var index = rand.Next(searchEngineUrls.Count); await Task.Run(async () =>
var searchEngine = searchEngineUrls.ElementAt(index); {
var index = random.Next(searchEngineUrls.Count);
var searchEngine = searchEngineUrls[index];
var url = searchEngine.Value + letters; var url = searchEngine.Value + letters;
using (var httpClient = new HttpClient())
{
try try
{ {
var httpClient = new HttpClient();
var response = await httpClient.GetAsync(url).ConfigureAwait(false); var response = await httpClient.GetAsync(url).ConfigureAwait(false);
if (response.IsSuccessStatusCode) if (response.IsSuccessStatusCode)
{ {
var responseString = await response.Content.ReadAsStringAsync().ConfigureAwait(false); var responseString = await response.Content.ReadAsStringAsync().ConfigureAwait(false);
// find all .xxx.com addresses
var hostNames = Regex.Matches(responseString, @"[.](\w+[.]com)"); var hostNames = Regex.Matches(responseString, @"[.](\w+[.]com)");
// Loop through the match collection to retrieve all matches and delete the leading "."
var uniqueNames = new HashSet<string>();
foreach (Match match in hostNames) foreach (Match match in hostNames)
{ {
var name = match.Groups[1].Value; var name = match.Groups[1].Value;
if (name != $"{searchEngine.Key.ToLower()}.com") if (name == $"{searchEngine.Key.ToLower()}.com") continue;
{
uniqueNames.Add(name);
}
}
// Add the names to the list urls.TryAdd(name, 0); // TryAdd is thread-safe
foreach (var name in uniqueNames)
{
if (urls.Count >= totalUrls) if (urls.Count >= totalUrls)
{ {
break; break;
} }
if (!urls.Contains(name))
{
urls.Add(name);
}
} }
} }
else else
{ {
// Handle non-successful status codes (optional) searchEngineUrls.RemoveAt(index);
searchEngineUrls.Remove(searchEngine.Key);
SetStatus($"{searchEngine.Key}: {response.StatusCode}"); SetStatus($"{searchEngine.Key}: {response.StatusCode}");
} }
httpClient.Dispose();
} }
catch (Exception ex) catch (Exception ex)
{ {
// Handle exceptions (optional) searchEngineUrls.RemoveAt(index);
searchEngineUrls.Remove(searchEngine.Key);
SetStatus($"{searchEngine.Key}: {ex.Message}"); SetStatus($"{searchEngine.Key}: {ex.Message}");
} }
}
letters = GetRandomLetters(); letters = GetRandomLetters();
await Task.Delay(100).ConfigureAwait(false); await Task.Delay(100).ConfigureAwait(false);
}).ConfigureAwait(false);
} }
var urlText = "url" + (urls.Count > 1 ? "'s" : string.Empty); var urlText = "url" + (urls.Count > 1 ? "'s" : string.Empty);
SetStatus($"{urls.Count} random {urlText} found"); SetStatus($"{urls.Count} random {urlText} found");
return urls; return urls.Keys.ToList();
} }
private static Dictionary<string, string> GetSearchEngines() private static Dictionary<string, string> GetSearchEngines()
{ {
var searchEngineUrls = new Dictionary<string, string>(); var searchEngineUrls = new Dictionary<string, string>();
@ -139,22 +129,24 @@ namespace EonaCat.DnsTester.Helpers
public static async Task<List<string>> RetrieveUrlsAsync(int numThreads, int numUrlsPerThread) public static async Task<List<string>> RetrieveUrlsAsync(int numThreads, int numUrlsPerThread)
{ {
var tasks = new Task[numThreads]; var tasks = new List<Task<List<string>>>();
var urlList = new List<string>(); // Start each task to retrieve a subset of unique URLs
// start each thread to retrieve a subset of unique URLs
for (var i = 0; i < numThreads; i++) for (var i = 0; i < numThreads; i++)
{ {
tasks[i] = Task.Run(async () => urlList.AddRange(await GetRandomUrlsAsync(numUrlsPerThread).ConfigureAwait(false))); tasks.Add(GetRandomUrlsAsync(numUrlsPerThread));
} }
// wait for all threads to complete // Wait for all tasks to complete
await Task.WhenAll(tasks).ConfigureAwait(false); var results = await Task.WhenAll(tasks).ConfigureAwait(false);
// Flatten the results from all tasks into a single list
var urlList = results.SelectMany(urls => urls).ToList();
return urlList; return urlList;
} }
private static string GetRandomLetters() private static string GetRandomLetters()
{ {
// Generate a cryptographically strong random string // Generate a cryptographically strong random string

View File

@ -43,10 +43,8 @@ namespace EonaCat.DnsTester
return; return;
} }
var urls = new List<string>();
SetupView(); SetupView();
var numThreads = (int)numericUpDown2.Value; // number of concurrent threads to use var numThreads = (int)numericUpDown2.Value; // number of concurrent threads to use
var maxUrls = (int)numericUpDown1.Value; // maximum number of unique URLs to retrieve var maxUrls = (int)numericUpDown1.Value; // maximum number of unique URLs to retrieve
var numUrlsPerThread = maxUrls / numThreads; var numUrlsPerThread = maxUrls / numThreads;
@ -57,12 +55,14 @@ namespace EonaCat.DnsTester
} }
SetSearchEngines(); SetSearchEngines();
urls = await UrlHelper.RetrieveUrlsAsync(numThreads, numUrlsPerThread).ConfigureAwait(false); var urls = await UrlHelper.RetrieveUrlsAsync(numThreads, numUrlsPerThread).ConfigureAwait(false);
AddUrlToView(urls); AddUrlToView(urls);
IsRunning = true; IsRunning = true;
RunTest.Invoke(() => { RunTest.Enabled = false; });
await ProcessAsync(_recordType, urls.ToArray(), _dnsServer1, _dnsServer2).ConfigureAwait(false); await ProcessAsync(_recordType, urls.ToArray(), _dnsServer1, _dnsServer2).ConfigureAwait(false);
IsRunning = false; IsRunning = false;
RunTest.Invoke(() => { RunTest.Enabled = true; });
} }
private void SetSearchEngines() private void SetSearchEngines()