Skip to content

Commit bc60381

Browse files
committed
Update, fixes
Fixed #8 Fixed #7 Fixed a bug in the parsing of the domain option. Added referer option, which works very much like the domains option, except it operates against the referer field of the headers when doing a match. Thus it is possible to have a global referer rule, or domain specific referer rules, etc etc.
1 parent 53ef810 commit bc60381

6 files changed

Lines changed: 225 additions & 43 deletions

File tree

DistillNET/DistillNET/DistillNET.nuspec

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
<package >
33
<metadata>
44
<id>DistillNET</id>
5-
<version>1.1.4</version>
5+
<version>1.3.1</version>
66
<title>DistillNET</title>
77
<authors>TechnikEmpire</authors>
8-
<releaseNotes>Modified in memory connection string, as well as filter lookup API, to enable threaded reads. Connections are now created on read (pooled) to comply with one transaction per connection.</releaseNotes>
8+
<releaseNotes>Fixed an error in the parsing of the domains option. Added a new referer option.</releaseNotes>
99
<owners>TechnikEmpire</owners>
1010
<projectUrl>https://github.com/TechnikEmpire/DistillNET</projectUrl>
1111
<requireLicenseAcceptance>true</requireLicenseAcceptance>

DistillNET/DistillNET/DistillNET/AbpFormatRuleParser.cs

Lines changed: 56 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,11 @@ private Filter ParseUrlFilter(string rule, int optionsStartOffset, bool hasOptio
227227
string originalRuleCopy = rule;
228228

229229
string[] allOptions = null;
230-
List<string> applicableDomains = new List<string>();
231-
List<string> exceptionDomains = new List<string>();
230+
HashSet<string> applicableReferers = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
231+
HashSet<string> exceptReferers = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
232+
233+
HashSet<string> applicableDomains = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
234+
HashSet<string> exceptionDomains = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
232235

233236
// Trim off the leading "@@" chracters if it's an exception.
234237
if(isException)
@@ -250,16 +253,24 @@ private Filter ParseUrlFilter(string rule, int optionsStartOffset, bool hasOptio
250253
var enumOptions = UrlFilter.UrlFilterOptions.None;
251254

252255
if(allOptions != null)
253-
{
254-
string domainsOption = null;//allOptions.Where(r => r.IndexOf('=') != -1).FirstOrDefault();
256+
{
257+
string domainsOption = null;
258+
259+
string refererOption = null;
260+
255261
var allOptLen = allOptions.Length;
256262
for(int i = 0; i < allOptLen; ++i)
257263
{
258-
if(allOptions[i].Length > 7 && allOptions[i][6] == '=')
264+
if(allOptions[i].Length > 7 && allOptions[i][0] == 'd' && allOptions[i][7] == '=')
259265
{
260266
domainsOption = allOptions[i];
261267
allOptions[i] = string.Empty;
262-
break;
268+
}
269+
270+
if(allOptions[i].Length > 7 && allOptions[i][0] == 'r' && allOptions[i][7] == '=')
271+
{
272+
refererOption = allOptions[i];
273+
allOptions[i] = string.Empty;
263274
}
264275
}
265276

@@ -268,13 +279,11 @@ private Filter ParseUrlFilter(string rule, int optionsStartOffset, bool hasOptio
268279
// If we got a domains option, split it out of the main options collection, as
269280
// this is the only type of option that's special aka has to parsed differently.
270281
// Differentiate simply by string length for speed, but do ordinal compare when
271-
// lengths are equal. allOptions = allOptions.Where(r => (r.Length !=
272-
// domainsOption.Length || !r.Equals(domainsOption,
273-
// StringComparison.OrdinalIgnoreCase))).ToArray();
282+
// lengths are equal.
274283

275284
// Trim off the "domains=" part, then split by the domains delimiter, which is a
276285
// pipe.
277-
domainsOption = domainsOption.Substring(7);
286+
domainsOption = domainsOption.Substring(8);
278287
var rawDomains = domainsOption.Split(s_domainsDelim, StringSplitOptions.None);
279288

280289
// Get applicable and exception domains. Exception domains in the list start with tilde,
@@ -301,6 +310,42 @@ private Filter ParseUrlFilter(string rule, int optionsStartOffset, bool hasOptio
301310
}
302311
}
303312

313+
if(refererOption != null)
314+
{
315+
// If we got a referers option, split it out of the main options collection, as
316+
// this is the only type of option that's special aka has to parsed differently.
317+
// Differentiate simply by string length for speed, but do ordinal compare when
318+
// lengths are equal.
319+
320+
// Trim off the "referer=" part, then split by the domains delimiter, which is a
321+
// pipe.
322+
refererOption = refererOption.Substring(8);
323+
var rawReferers = refererOption.Split(s_domainsDelim, StringSplitOptions.None);
324+
325+
// Get applicable and exception referers. Exception referers in the list start with tilde,
326+
// applicable referers don't. Applicable here meaning that the rule should apply to such
327+
// a domain.
328+
329+
var referersLen = rawReferers.Length;
330+
for(int i = 0; i < referersLen; ++i)
331+
{
332+
switch(rawReferers[i][0])
333+
{
334+
case '~':
335+
{
336+
exceptReferers.Add(rawReferers[i].Substring(1));
337+
}
338+
break;
339+
340+
default:
341+
{
342+
applicableReferers.Add(rawReferers[i]);
343+
}
344+
break;
345+
}
346+
}
347+
}
348+
304349
// Parse out the rest of the options.
305350
UrlFilter.UrlFilterOptions asOpt;
306351
foreach(var opt in allOptions)
@@ -452,7 +497,7 @@ private Filter ParseUrlFilter(string rule, int optionsStartOffset, bool hasOptio
452497
compiledParts.Add(new UrlFilter.StringLiteralFragment(rule.Substring(lastCol), enumOptions.HasFlag(UrlFilter.UrlFilterOptions.MatchCase)));
453498
}
454499

455-
return new UrlFilter(originalRuleCopy, compiledParts, enumOptions, applicableDomains, exceptionDomains, isException, categoryId);
500+
return new UrlFilter(originalRuleCopy, compiledParts, enumOptions, applicableDomains, exceptionDomains, applicableReferers, exceptReferers, isException, categoryId);
456501
}
457502
}
458503
}

DistillNET/DistillNET/DistillNET/FilterDbCollection.cs

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ namespace DistillNET
2323
/// rather than serialized/deserialized, because the parser is much faster than such utilities
2424
/// such as protobuf.
2525
/// </summary>
26-
public class FilterDbCollection
26+
public class FilterDbCollection : IDisposable
2727
{
2828
/// <summary>
2929
/// Our rule parser.
@@ -217,7 +217,7 @@ public async Task<Tuple<int, int>> ParseStoreRules(string[] rawRuleStrings, shor
217217
if(filter.ApplicableDomains.Count > 0)
218218
{
219219
foreach(var dmn in filter.ApplicableDomains)
220-
{
220+
{
221221
cmd.Parameters[0].Value = dmn;
222222
cmd.Parameters[1].Value = categoryId;
223223
cmd.Parameters[2].Value = filter.IsException;
@@ -428,5 +428,44 @@ public List<Filter> GetFiltersForRequest(Uri requestString, string referer = "")
428428
{
429429
return null;
430430
}
431+
432+
#region IDisposable Support
433+
private bool disposedValue = false; // To detect redundant calls
434+
435+
protected virtual void Dispose(bool disposing)
436+
{
437+
if(!disposedValue)
438+
{
439+
if(disposing)
440+
{
441+
if(m_connection != null)
442+
{
443+
m_connection.Close();
444+
m_connection = null;
445+
}
446+
}
447+
448+
// TODO: free unmanaged resources (unmanaged objects) and override a finalizer below.
449+
// TODO: set large fields to null.
450+
451+
disposedValue = true;
452+
}
453+
}
454+
455+
// TODO: override a finalizer only if Dispose(bool disposing) above has code to free unmanaged resources.
456+
// ~FilterDbCollection() {
457+
// // Do not change this code. Put cleanup code in Dispose(bool disposing) above.
458+
// Dispose(false);
459+
// }
460+
461+
// This code added to correctly implement the disposable pattern.
462+
public void Dispose()
463+
{
464+
// Do not change this code. Put cleanup code in Dispose(bool disposing) above.
465+
Dispose(true);
466+
// TODO: uncomment the following line if the finalizer is overridden above.
467+
// GC.SuppressFinalize(this);
468+
}
469+
#endregion
431470
}
432471
}

DistillNET/DistillNET/DistillNET/UrlFilter.cs

Lines changed: 73 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -460,21 +460,41 @@ public override int IsMatch(Uri source, int lastPosition)
460460
}
461461

462462
/// <summary>
463-
/// Gets an array of all domains that this URL filter rule applies to. In the event that this
463+
/// Gets a hashset of all referers that this URL filter rule applies to. In the event that
464+
/// this array is empty, the referer field on requests will not be checked.
465+
/// </summary>
466+
public HashSet<string> ApplicableReferers
467+
{
468+
get;
469+
private set;
470+
}
471+
472+
/// <summary>
473+
/// Gets a hashset of all referers that this URL filter rule applies to. In the event that
474+
/// this array is empty, the referer field on requests will not be checked.
475+
/// </summary>
476+
public HashSet<string> ExceptReferers
477+
{
478+
get;
479+
private set;
480+
}
481+
482+
/// <summary>
483+
/// Gets a hashset of all domains that this URL filter rule applies to. In the event that this
464484
/// array is empty, the rule applies globally, to all domains.
465485
/// </summary>
466-
public List<string> ApplicableDomains
486+
public HashSet<string> ApplicableDomains
467487
{
468488
get;
469489
private set;
470490
}
471491

472492
/// <summary>
473-
/// Gets an array of all domains that this URL filter should not be applied to. In the event
493+
/// Gets a hashset of all domains that this URL filter should not be applied to. In the event
474494
/// that this array is empty, the rule applies either globally, or exclusively to the list of
475495
/// applicable domains, if that property is not empty.
476496
/// </summary>
477-
public List<string> ExceptionDomains
497+
public HashSet<string> ExceptionDomains
478498
{
479499
get;
480500
private set;
@@ -538,13 +558,16 @@ public List<UrlFilteringRuleFragment> Parts
538558
/// <param name="categoryId">
539559
/// The category ID of the category this filter belongs to.
540560
/// </param>
541-
internal UrlFilter(string originalRule, List<UrlFilteringRuleFragment> parts, UrlFilterOptions options, List<string> applicableDomains, List<string> exceptionDomains, bool isException, short categoryId) : base(originalRule, isException, categoryId)
561+
internal UrlFilter(string originalRule, List<UrlFilteringRuleFragment> parts, UrlFilterOptions options, HashSet<string> applicableDomains, HashSet<string> exceptionDomains, HashSet<string> applicableReferers, HashSet<string> exceptionReferers, bool isException, short categoryId) : base(originalRule, isException, categoryId)
542562
{
543563
Parts = parts;
544564
Options = options;
545565

546566
ApplicableDomains = applicableDomains;
547567
ExceptionDomains = exceptionDomains;
568+
569+
ApplicableReferers = applicableReferers;
570+
ExceptReferers = exceptionReferers;
548571
}
549572

550573
/// <summary>
@@ -564,23 +587,23 @@ public bool IsMatch(Uri uri, NameValueCollection rawHeaders)
564587
{
565588
// Make sure that the headers match up with our options.
566589
if(this.Options != UrlFilterOptions.None)
567-
{
590+
{
568591
string headerVal = null;
569592
long xmlHttpRequestBits = ((OptionsLong & (long)UrlFilterOptions.ExceptXmlHttpRequest) | (OptionsLong & (long)UrlFilterOptions.XmlHttpRequest));
570593
if((headerVal = rawHeaders.Get("X-Requested-With")) != null)
571594
{
572595
if(headerVal.Equals("XMLHttpRequest", StringComparison.OrdinalIgnoreCase))
573-
{
596+
{
574597
xmlHttpRequestBits &= ~(long)UrlFilterOptions.XmlHttpRequest;
575598
}
576599
else
577-
{
600+
{
578601
xmlHttpRequestBits &= ~(long)UrlFilterOptions.ExceptXmlHttpRequest;
579602
}
580603
}
581604

582605
if(xmlHttpRequestBits != 0)
583-
{
606+
{
584607
// XML HttpRequest bits were not cleared, meaning that one of those options was not satisifed.
585608
return false;
586609
}
@@ -596,6 +619,21 @@ public bool IsMatch(Uri uri, NameValueCollection rawHeaders)
596619
{
597620
thirdPartyBits &= ~(long)UrlFilterOptions.ThirdParty;
598621
}
622+
623+
// While we have the referer field, let's go ahead and check if we have referer
624+
// options and if we do or don't have a match.
625+
//
626+
// This is a shortcut. We unfortunately need to also execute this code also when
627+
// there are no options.
628+
if(ApplicableReferers.Count > 0 && !ApplicableReferers.Contains(headerVal))
629+
{
630+
return false;
631+
}
632+
633+
if(ExceptReferers.Count > 0 && ExceptReferers.Contains(headerVal))
634+
{
635+
return false;
636+
}
599637
}
600638
else
601639
{
@@ -617,7 +655,7 @@ public bool IsMatch(Uri uri, NameValueCollection rawHeaders)
617655
if((headerVal = rawHeaders.Get("Content-Type")) != null)
618656
{
619657
if(headerVal.IndexOfQuick("script") != -1)
620-
{
658+
{
621659
contentTypeBits &= ~(long)UrlFilterOptions.Script;
622660
}
623661
else
@@ -650,7 +688,31 @@ public bool IsMatch(Uri uri, NameValueCollection rawHeaders)
650688
return false;
651689
}
652690
}
653-
691+
else
692+
{
693+
if(ApplicableReferers.Count > 0 || ExceptReferers.Count > 0)
694+
{
695+
string headerVal = null;
696+
if((headerVal = rawHeaders.Get("Referer")) != null)
697+
{
698+
// While we have the referer field, let's go ahead and check if we have referer
699+
// options and if we do or don't have a match.
700+
//
701+
// This is a shortcut. We unfortunately need to also execute this code also when
702+
// there are no options.
703+
if(ApplicableReferers.Count > 0 && !ApplicableReferers.Contains(headerVal))
704+
{
705+
return false;
706+
}
707+
708+
if(ExceptReferers.Count > 0 && ExceptReferers.Contains(headerVal))
709+
{
710+
return false;
711+
}
712+
}
713+
}
714+
}
715+
654716
int matchIndex = 0;
655717
foreach(var part in Parts)
656718
{

DistillNET/DistillNET/Properties/AssemblyInfo.cs

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,10 @@
66
*/
77

88
using System.Reflection;
9-
using System.Runtime.CompilerServices;
109
using System.Runtime.InteropServices;
1110

12-
// General Information about an assembly is controlled through the following
13-
// set of attributes. Change these attribute values to modify the information
14-
// associated with an assembly.
11+
// General Information about an assembly is controlled through the following set of attributes.
12+
// Change these attribute values to modify the information associated with an assembly.
1513
[assembly: AssemblyTitle("DistillNET")]
1614
[assembly: AssemblyDescription("A library for matching and filtering HTTP requests and HTML response content using the Adblock Plus Filter format.")]
1715
[assembly: AssemblyConfiguration("")]
@@ -21,23 +19,18 @@
2119
[assembly: AssemblyTrademark("")]
2220
[assembly: AssemblyCulture("")]
2321

24-
// Setting ComVisible to false makes the types in this assembly not visible
25-
// to COM components. If you need to access a type in this assembly from
26-
// COM, set the ComVisible attribute to true on that type.
22+
// Setting ComVisible to false makes the types in this assembly not visible to COM components. If you
23+
// need to access a type in this assembly from COM, set the ComVisible attribute to true on that type.
2724
[assembly: ComVisible(false)]
2825

2926
// The following GUID is for the ID of the typelib if this project is exposed to COM
3027
[assembly: Guid("66aefe15-e2ac-4ce1-a83e-4e3e3ca2f9de")]
3128

3229
// Version information for an assembly consists of the following four values:
3330
//
34-
// Major Version
35-
// Minor Version
36-
// Build Number
37-
// Revision
31+
// Major Version Minor Version Build Number Revision
3832
//
39-
// You can specify all the values or you can default the Build and Revision Numbers
40-
// by using the '*' as shown below:
41-
// [assembly: AssemblyVersion("1.0.*")]
42-
[assembly: AssemblyVersion("1.1.5.0")]
43-
[assembly: AssemblyFileVersion("1.1.5.0")]
33+
// You can specify all the values or you can default the Build and Revision Numbers by using the '*'
34+
// as shown below: [assembly: AssemblyVersion("1.0.*")]
35+
[assembly: AssemblyVersion("1.3.1.0")]
36+
[assembly: AssemblyFileVersion("1.3.1.0")]

0 commit comments

Comments
 (0)