feat(plugin): add clearURLs (#47)

This commit is contained in:
Ariana
2022-10-05 10:37:49 -04:00
committed by GitHub
parent 3e0355cb53
commit 45644dec43
2 changed files with 268 additions and 0 deletions

View File

@ -0,0 +1,134 @@
/*
* Global Parameter
* `utm_source`
*
* Parameter restricted to domain
* `feature@youtube.com`
*
* Domains wildcards
* `tag@amazon.*`
*
* Parameter wildcards
* `utm_*`>
*
*/
export const defaultRules = [
"action_object_map",
"action_type_map",
"action_ref_map",
"spm@*.aliexpress.com",
"scm@*.aliexpress.com",
"aff_platform",
"aff_trace_key",
"algo_expid@*.aliexpress.*",
"algo_pvid@*.aliexpress.*",
"btsid",
"ws_ab_test",
"pd_rd_*@amazon.*",
"_encoding@amazon.*",
"psc@amazon.*",
"tag@amazon.*",
"ref_@amazon.*",
"pf_rd_*@amazon.*",
"pf@amazon.*",
"crid@amazon.*",
"keywords@amazon.*",
"sprefix@amazon.*",
"sr@amazon.*",
"ie@amazon.*",
"node@amazon.*",
"qid@amazon.*",
"callback@bilibili.com",
"cvid@bing.com",
"form@bing.com",
"sk@bing.com",
"sp@bing.com",
"sc@bing.com",
"qs@bing.com",
"pq@bing.com",
"sc_cid",
"mkt_tok",
"trk",
"trkCampaign",
"ga_*",
"gclid",
"gclsrc",
"hmb_campaign",
"hmb_medium",
"hmb_source",
"spReportId",
"spJobID",
"spUserID",
"spMailingID",
"itm_*",
"s_cid",
"elqTrackId",
"elqTrack",
"assetType",
"assetId",
"recipientId",
"campaignId",
"siteId",
"mc_cid",
"mc_eid",
"pk_*",
"sc_campaign",
"sc_channel",
"sc_content",
"sc_medium",
"sc_outcome",
"sc_geo",
"sc_country",
"nr_email_referer",
"vero_conv",
"vero_id",
"yclid",
"_openstat",
"mbid",
"cmpid",
"cid",
"c_id",
"campaign_id",
"Campaign",
"hash@ebay.*",
"fb_action_ids",
"fb_action_types",
"fb_ref",
"fb_source",
"fbclid",
"refsrc@facebook.com",
"hrc@facebook.com",
"gs_l",
"gs_lcp@google.*",
"ved@google.*",
"ei@google.*",
"sei@google.*",
"gws_rd@google.*",
"gs_gbg@google.*",
"gs_mss@google.*",
"gs_rn@google.*",
"_hsenc",
"_hsmi",
"__hssc",
"__hstc",
"hsCtaTracking",
"source@sourceforge.net",
"position@sourceforge.net",
"t@*.twitter.com",
"s@*.twitter.com",
"ref_*@*.twitter.com",
"tt_medium",
"tt_content",
"lr@yandex.*",
"redircnt@yandex.*",
"feature@youtube.com",
"kw@youtube.com",
"wt_zmc",
"utm_source",
"utm_content",
"utm_medium",
"utm_campaign",
"utm_term",
"si@open.spotify.com",
];

View File

@ -0,0 +1,134 @@
import { defaultRules } from "./defaultRules";
import {
addPreSendListener,
addPreEditListener,
MessageObject,
removePreSendListener,
removePreEditListener,
} from "../../api/MessageEvents";
import definePlugin from "../../utils/types";
// From lodash
const reRegExpChar = /[\\^$.*+?()[\]{}|]/g;
const reHasRegExpChar = RegExp(reRegExpChar.source);
export default definePlugin({
name: "clearURLs",
description: "Removes tracking garbage from URLs",
authors: [
{
name: "adryd",
id: 0n,
},
],
dependencies: ["MessageEventsAPI"],
escapeRegExp(str: string) {
return (str && reHasRegExpChar.test(str))
? str.replace(reRegExpChar, "\\$&")
: (str || "");
},
createRules() {
// Can be extended upon once user configs are available
// Eg. (useDefaultRules: boolean, customRules: Array[string])
const rules = defaultRules;
this.universalRules = new Set();
this.rulesByHost = new Map();
this.hostRules = new Map();
for (const rule of rules) {
const splitRule = rule.split("@");
const paramRule = new RegExp(
"^" +
this.escapeRegExp(splitRule[0]).replace(/\\\*/, ".+?") +
"$"
);
if (!splitRule[1]) {
this.universalRules.add(paramRule);
continue;
}
const hostRule = new RegExp(
"^(www\\.)?" +
this.escapeRegExp(splitRule[1])
.replace(/\\\./, "\\.")
.replace(/^\\\*\\\./, "(.+?\\.)?")
.replace(/\\\*/, ".+?") +
"$"
);
const hostRuleIndex = hostRule.toString();
this.hostRules.set(hostRuleIndex, hostRule);
if (this.rulesByHost.get(hostRuleIndex) == null) {
this.rulesByHost.set(hostRuleIndex, new Set());
}
this.rulesByHost.get(hostRuleIndex).add(paramRule);
}
},
removeParam(rule: string | RegExp, param: string, parent: URLSearchParams) {
if (param === rule || rule instanceof RegExp && rule.test(param)) {
parent.delete(param);
}
},
replacer(match: string) {
// Parse URL without throwing errors
try {
var url = new URL(match);
} catch (error) {
// Don't modify anything if we can't parse the URL
return match;
}
// Cheap way to check if there are any search params
if (url.searchParams.entries().next().done) {
// If there are none, we don't need to modify anything
return match;
}
// Check all universal rules
this.universalRules.forEach((rule) => {
url.searchParams.forEach((_value, param, parent) => {
this.removeParam(rule, param, parent);
});
});
// Check rules for each hosts that match
this.hostRules.forEach((regex, hostRuleName) => {
if (!regex.test(url.hostname)) return;
this.rulesByHost.get(hostRuleName).forEach((rule) => {
url.searchParams.forEach((_value, param, parent) => {
this.removeParam(rule, param, parent);
});
});
});
return url.toString();
},
onSend(msg: MessageObject) {
// Only run on messages that contain URLs
if (msg.content.match(/http(s)?:\/\//)) {
msg.content = msg.content.replace(
/(https?:\/\/[^\s<]+[^<.,:;"'>)|\]\s])/g,
(match) => this.replacer(match)
);
}
},
start() {
this.createRules();
this.preSend = addPreSendListener((_, msg) => this.onSend(msg));
this.preEdit = addPreEditListener((_cid, _mid, msg) =>
this.onSend(msg)
);
},
stop() {
removePreSendListener(this.preSend);
removePreEditListener(this.preEdit);
},
});