实现验证码自动提取以及原邮件内容推送到Bark服务器
// ======== 配置区(你只需改这里) ========
const BARK_KEY = ""; // <-- 只改这里
const BARK_SERVER = ""; // 不用改(如你有自建 Bark 再改)
const BARK_PUSH_URL = `${BARK_SERVER}/${BARK_KEY}`;
const BARK_ARG_OTP = "group=OTP&level=timeSensitive©=1&code=";
const BARK_ARG_FULL = "group=MAIL&level=passive";
// =======================================
// ---------- 辅助函数 ----------
// Quoted-Printable 简单解码(用于正文解码)
function decodeQuotedPrintable(str) {
if (!str) return str;
return str
.replace(/=\r\n/g, '') // 移除软换行
.replace(/=([A-Fa-f0-9]{2})/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)));
}
// Base64 -> UTF-8 安全解码
function decodeBase64Utf8(base64Str) {
const cleaned = base64Str.replace(/\r?\n/g, '');
// atob -> bytes -> TextDecoder("utf-8")
const binary = atob(cleaned);
const bytes = Uint8Array.from(binary, (c) => c.charCodeAt(0));
return new TextDecoder('utf-8').decode(bytes);
}
// 删除 <style>、<script>、注释 并把常用块级标签换成换行,最后去掉剩余标签并解实体(简单)
function htmlToCleanText(html) {
if (!html) return '';
// Remove comments, style, script
let s = html.replace(/<!--[\s\S]*?-->/g, ' ');
s = s.replace(/<style[\s\S]*?>[\s\S]*?<\/style>/gi, ' ');
s = s.replace(/<script[\s\S]*?>[\s\S]*?<\/script>/gi, ' ');
// Normalize <br> to newline
s = s.replace(/<(br|br\/|br\s+\/)>/gi, '\n');
// closing block-like tags -> newline
s = s.replace(/<\/(p|div|tr|li|h[1-6]|table|tbody|thead|tfoot)>/gi, '\n');
s = s.replace(/<(p|div|tr|li|h[1-6]|table|tbody|thead|tfoot)[^>]*>/gi, '\n');
// remove remaining tags
s = s.replace(/<[^>]+>/g, ' ');
// decode simple HTML entities (numeric & a few named)
s = s.replace(/&#x([0-9A-Fa-f]+);?/g, (_, hx) => String.fromCharCode(parseInt(hx, 16)));
s = s.replace(/&#([0-9]+);?/g, (_, d) => String.fromCharCode(parseInt(d, 10)));
s = s.replace(/ |&|<|>|"|'/g, (m) => {
switch (m) {
case ' ': return ' ';
case '&': return '&';
case '<': return '<';
case '>': return '>';
case '"': return '"';
case ''': return "'";
default: return m;
}
});
// Collapse repeated whitespace but keep newlines
s = s.replace(/\r\n/g, '\n');
s = s.replace(/[ \t]+\n/g, '\n');
s = s.replace(/\n{3,}/g, '\n\n');
s = s.replace(/[ \t]{2,}/g, ' ');
s = s.trim();
return s;
}
// 在 HTML 清洗前提取 <a> 链接,防止按钮链接丢失
function preserveLinksForPlainText(html) {
if (!html) return html;
return html.replace(
/<a\s+[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi,
(m, href, text) => {
const label = text.replace(/<[^>]+>/g, '').trim();
return label ? `${label} (${href})` : href;
}
);
}
// 判断一段文本是否像大段 Base64(用于启发式判断)
function looksLikeBase64(s) {
if (!s) return false;
const trimmed = s.replace(/\r?\n/g, '').trim();
// 若长度较短不认为是 base64
if (trimmed.length < 40) return false;
// 基本 base64 字符集检测并以 = 或 == 结尾为加权因素
return /^[A-Za-z0-9+/=\s]+$/.test(s) && /={1,2}$/.test(trimmed);
}
// ---------- 解析 multipart / parts ----------
// 从一个 MIME part 中提取 text/plain 或 text/html 的干净文本(已解码)
function parsePartText(part) {
// find content-type
const ctMatch = part.match(/Content-Type:\s*([^;]+)(?:;[\s\S]*)?/i);
const ctype = ctMatch ? ctMatch[1].toLowerCase().trim() : null;
// extract body (after first blank line)
const splitIndex = part.search(/\r\n\r\n/);
let body = splitIndex >= 0 ? part.slice(splitIndex + 4) : part;
body = body.replace(/\r\n$/, '');
// check encoding
const isBase64 = /Content-Transfer-Encoding:\s*base64/i.test(part) || looksLikeBase64(body);
const isQP = /Content-Transfer-Encoding:\s*quoted-printable/i.test(part) || /=([0-9A-F]{2})/i.test(body);
// decode body if needed
try {
if (isBase64) {
body = decodeBase64Utf8(body);
} else if (isQP) {
body = decodeQuotedPrintable(body);
}
} catch (e) {
// fallback: keep original if decode fails
console.warn('decode failed for part', e);
}
// If plain text
if (ctype && /text\/plain/i.test(ctype)) {
// normalize CRLF -> \n, keep paragraphs
body = body.replace(/\r\n/g, '\n');
body = body.replace(/\n{3,}/g, '\n\n');
body = body.trim();
return { type: 'text', text: body };
}
// If html
if (ctype && /text\/html/i.test(ctype)) {
const cleaned = htmlToCleanText(body);
return { type: 'html', text: cleaned };
}
// fallback: return raw decoded text (but mark as unknown)
body = body.replace(/\r\n/g, '\n').trim();
return { type: 'unknown', text: body };
}
// 从 raw MIME 中收集 candidateParts(优先 text/plain)
function collectCandidateParts(raw) {
const candidate = [];
// try detect boundary (may be quoted or not)
let boundaryMatch = raw.match(/boundary="?([^"\r\n]+)"?/i);
if (boundaryMatch) {
const boundary = boundaryMatch[1];
// split on --boundary but ignore final --
const chunks = raw.split(`--${boundary}`);
for (const ch of chunks) {
if (!ch || /^\s*--/.test(ch)) continue;
const parsed = parsePartText(ch);
if (!parsed) continue;
if (parsed.type === 'text') candidate.push(parsed.text);
}
// if we found at least one text/plain, return them
if (candidate.length > 0) return candidate;
// otherwise fallback: collect cleaned html bodies
const htmls = [];
for (const ch of chunks) {
if (!ch || /^\s*--/.test(ch)) continue;
const parsed = parsePartText(ch);
if (parsed && parsed.type === 'html') htmls.push(parsed.text);
}
if (htmls.length > 0) return htmls;
} else {
// no boundary: try find text/plain or text/html blocks via regex
const plainMatch = raw.match(/Content-Type:\s*text\/plain[\s\S]*?\r\n\r\n([\s\S]+)/i);
if (plainMatch) {
let body = plainMatch[1];
// decode heuristically
const isBase64 = looksLikeBase64(body) || /Content-Transfer-Encoding:\s*base64/i.test(raw);
if (isBase64) {
try { body = decodeBase64Utf8(body); } catch (e) { /* ignore */ }
} else if (/Content-Transfer-Encoding:\s*quoted-printable/i.test(raw)) {
body = decodeQuotedPrintable(body);
}
body = body.replace(/\r\n/g, '\n').trim();
return [body];
}
const htmlMatch = raw.match(/Content-Type:\s*text\/html[\s\S]*?\r\n\r\n([\s\S]+)/i);
if (htmlMatch) {
let body = htmlMatch[1];
const isBase64 = looksLikeBase64(body) || /Content-Transfer-Encoding:\s*base64/i.test(raw);
if (isBase64) {
try { body = decodeBase64Utf8(body); } catch (e) { /* ignore */ }
} else if (/Content-Transfer-Encoding:\s*quoted-printable/i.test(raw)) {
body = decodeQuotedPrintable(body);
}
const cleaned = htmlToCleanText(body);
return [cleaned];
}
// final fallback: take entire raw body after first headers
const wholeBody = raw.split(/\r\n\r\n/).slice(1).join('\r\n\r\n').trim();
if (wholeBody) {
// attempt decode if looks encoded
let body = wholeBody;
if (looksLikeBase64(body)) {
try { body = decodeBase64Utf8(body); } catch (e) { /* ignore */ }
} else if (/=([0-9A-F]{2})/i.test(body)) {
body = decodeQuotedPrintable(body);
}
body = body.replace(/\r\n/g, '\n').trim();
// try to strip style/script if present
const cleaned = htmlToCleanText(body);
return [cleaned];
}
return [];
}
return [];
}
// ---------- 验证码提取逻辑 ----------
// 优先级:关键词 -> 纯数字 -> 必含数字的混合(可含 -) -> 多段连字符(2+)
// 且尽量避免 CSS 单词误匹配
function extractCodeFromCandidates(candidates) {
if (!Array.isArray(candidates)) return null;
// common CSS words to avoid accidental matches
const cssWords = ['margin', 'padding', 'width', 'height', 'color', 'border', 'background', 'font', 'display', 'table', 'solid', 'px'];
// build keyword regex
const codeKeywords = [
"验证码", "校验码", "动态码", "一次性密码", "一次性代码",
"验证代码", "安全码", "认证码", "提取码",
"verification code", "verify code",
"one-time password", "one time password",
"security code", "passcode", "OTP"
];
const keywordRegex = new RegExp(`(?:${codeKeywords.join("|")})[\\s\\S]{0,50}?([0-9A-Za-z][-0-9A-Za-z]{3,20})`, 'i');
// pure digits 4-12 (most robust)
const digitsRegex = /\b(\d{4,12})\b/;
// mixed that must contain at least one digit (allows hyphen inside, but not end with hyphen)
const mixedRegex = /(?=\S*\d)\b([0-9A-Za-z]+(?:[-][0-9A-Za-z]+){0,4})\b/;
// multi-hyphen strict: at least two '-' segments (e.g., a-b-c)
const multiHyphenRegex = /\b([0-9A-Za-z]+(?:-[0-9A-Za-z]+){2,})\b/;
// helper to filter obviously CSS tokens
function looksLikeCssToken(tok) {
if (!tok) return false;
const low = tok.toLowerCase();
if (/\b(?:[a-z\-]+:|;|\{|\})/.test(tok)) return true; // contains CSS punctuation
for (const w of cssWords) if (low.includes(w)) return true;
// CSS function like rgb( or url( etc
if (/[A-Za-z\-]+\(.*\)/.test(tok)) return true;
return false;
}
// 1) keyword scan
for (const c of candidates) {
try {
const m = c.match(keywordRegex);
if (m) {
const cand = m[1];
if (!looksLikeCssToken(cand)) return cand;
}
} catch (e) { /* ignore */ }
}
// 2) digits scan
for (const c of candidates) {
const m = c.match(digitsRegex);
if (m) return m[1];
}
// 3) mixed that must include digits (and not end with hyphen)
for (const c of candidates) {
const m = c.match(mixedRegex);
if (m) {
const cand = m[1];
if (!/^.*-$/.test(cand) && !looksLikeCssToken(cand)) return cand;
}
}
// 4) multi-hyphen fallback (strict length after removing hyphens)
for (const c of candidates) {
const m = c.match(multiHyphenRegex);
if (m) {
const cand = m[1];
const pure = cand.replace(/-/g, '');
if (pure.length >= 4 && pure.length <= 20 && /\d/.test(pure) && !looksLikeCssToken(cand)) {
return cand;
}
}
}
return null;
}
// ---------- Worker 入口 ----------
export default {
async email(message, env, ctx) {
try {
const raw = await new Response(message.raw).text();
// headers
const from = message.headers.get('from') || '(未知发件人)';
const to = message.headers.get('to') || '(未知收件人)';
const subject = message.headers.get('subject') || '';
// collect candidate parts (prefer text/plain)
const candidates = collectCandidateParts(raw);
// final fallback: if candidates empty, try whole-body cleaned
let usedCandidates = candidates && candidates.length ? candidates : [];
if (usedCandidates.length === 0) {
const wholeBodyRaw = raw.split(/\r\n\r\n/).slice(1).join('\r\n\r\n').trim();
if (wholeBodyRaw) {
// decode if likely encoded
let wb = wholeBodyRaw;
if (looksLikeBase64(wb)) {
try { wb = decodeBase64Utf8(wb); } catch (e) {}
} else if (/=([0-9A-F]{2})/i.test(wb)) {
wb = decodeQuotedPrintable(wb);
}
const cleaned = htmlToCleanText(wb);
usedCandidates = [cleaned];
}
}
// extract code
const code = extractCodeFromCandidates(usedCandidates);
console.log('提取到的验证码:', code || '(未识别)');
// ---------------------------
// 1) OTP 推送(标题为发件人,正文为收件人 + code)
const titleOtp = from;
const bodyOtp = code ? `收件人:${to}\n验证码:${code}` : `收件人:${to}\n未识别验证码`;
const pushUrlOtp = `${BARK_PUSH_URL}/${encodeURIComponent(titleOtp)}/${encodeURIComponent(bodyOtp)}?${BARK_ARG_OTP}${code || ''}`;
await fetch(pushUrlOtp);
// 2) 原文推送(方案 A:优先 text/plain(保留换行),没有则清洗 html body)
// Try to get plain text part for original display
let originalText = '';
const boundaryMatch = raw.match(/boundary="?([^"\r\n]+)"?/i);
if (boundaryMatch) {
// try find first text/plain parsed earlier by collectCandidateParts logic:
const plainParts = [];
const boundary = boundaryMatch[1];
const chunks = raw.split(`--${boundary}`);
for (const ch of chunks) {
if (!ch || /^\s*--/.test(ch)) continue;
const ct = ch.match(/Content-Type:\s*([^;]+)/i);
if (ct && /text\/plain/i.test(ct[1])) {
// decode same way as parsePartText
const parsed = parsePartText(ch);
if (parsed && parsed.type === 'text') {
plainParts.push(parsed.text);
} else if (parsed && parsed.text) {
plainParts.push(parsed.text);
}
}
}
if (plainParts.length > 0) originalText = plainParts.join('\n\n');
else {
// fallback to first html cleaned (但先保留链接)
const htmlParts = [];
for (const ch of chunks) {
if (!ch || /^\s*--/.test(ch)) continue;
const ct = ch.match(/Content-Type:\s*([^;]+)/i);
if (ct && /text\/html/i.test(ct[1])) {
const splitIndex = ch.search(/\r\n\r\n/);
let body = splitIndex >= 0 ? ch.slice(splitIndex + 4) : ch;
if (/Content-Transfer-Encoding:\s*base64/i.test(ch)) {
try { body = decodeBase64Utf8(body); } catch {}
} else if (/quoted-printable/i.test(ch)) {
body = decodeQuotedPrintable(body);
}
// ⭐ 核心补丁:先保留链接
body = preserveLinksForPlainText(body);
const cleaned = htmlToCleanText(body);
if (cleaned) htmlParts.push(cleaned);
}
}
originalText = htmlParts.join('\n\n');
}
} else {
// no boundary: try text/plain header then html
const plainMatch = raw.match(/Content-Type:\s*text\/plain[\s\S]*?\r\n\r\n([\s\S]+)/i);
if (plainMatch) {
let body = plainMatch[1];
if (looksLikeBase64(body)) {
try { body = decodeBase64Utf8(body); } catch (e) {}
} else if (/=([0-9A-F]{2})/i.test(body)) {
body = decodeQuotedPrintable(body);
}
originalText = body.replace(/\r\n/g, '\n').trim();
} else {
const htmlMatch = raw.match(/Content-Type:\s*text\/html[\s\S]*?\r\n\r\n([\s\S]+)/i);
if (htmlMatch) {
let body = htmlMatch[1];
if (looksLikeBase64(body)) {
try { body = decodeBase64Utf8(body); } catch (e) {}
} else if (/=([0-9A-F]{2})/i.test(body)) {
body = decodeQuotedPrintable(body);
}
body = preserveLinksForPlainText(body); // ⭐ 补这一行
originalText = htmlToCleanText(body);
} else {
// final fallback: whole-body cleaned
const whole = raw.split(/\r\n\r\n/).slice(1).join('\r\n\r\n').trim();
if (whole) {
let body = whole;
if (looksLikeBase64(body)) {
try { body = decodeBase64Utf8(body); } catch (e) {}
} else if (/=([0-9A-F]{2})/i.test(body)) {
body = decodeQuotedPrintable(body);
}
originalText = htmlToCleanText(body);
}
}
}
}
// normalize newlines and trim; keep paragraph breaks
originalText = originalText.replace(/\r\n/g, '\n').replace(/\n{3,}/g, '\n\n').trim();
const rawText = originalText.slice(0, 5000);
const titleFull = from;
const bodyFull = `收件人:${to}\n邮件内容(截断5000字符):\n${rawText}`;
const pushUrlFull = `${BARK_PUSH_URL}/${encodeURIComponent(titleFull)}/${encodeURIComponent(bodyFull)}?${BARK_ARG_FULL}`;
await fetch(pushUrlFull);
} catch (err) {
console.error('Email Worker 错误:', err);
}
}
};