[已优化]自建nodeseek个性化推送python脚本

2024年4月28日 3条评论 17次阅读 0人点赞 Tao_Qi

和官方的区别：

1、可屏蔽特定用户发的贴子
2、可屏蔽包含屏蔽词的标题或内容的帖子（比如大量的节日祝福的水贴）
3、更新速度不如官方版本

代码类型：

python

运行准备

python3
已安装feedparser和requests
tg机器人一个

须知

代码可能存在未知bug，如若发生，请自行更正。
本爬虫遵守本站Robots协议。
搭配浏览器屏蔽脚本更佳：java大佬开发的

原理

对论坛RSS爬取解析。

更新

1、已处理tg预留关键词冲突导致推送失败的问题。
2、已优化异常处理。

代码：

第一种（推荐）

额外运行准备：在py文件所在文件夹中创建一个名为“key.txt”的文件，文件内只输入数字0，保存即可
该种方案，通过调用Debian系统开机自启动（Windows同理）实现推送。

import feedparser
import requests
import time

def noraml(text):
    text = text.replace("_", "\_")
    text = text.replace("*", "\*")
    text = text.replace("[", "\[")
    text = text.replace("]", "\]")
    text = text.replace("(", "\(")
    text = text.replace(")", "\)")
    text = text.replace("~", "\~")
    text = text.replace("`", "\`")
    text = text.replace(">", "\>")
    text = text.replace("#", "\#")
    text = text.replace(" ", "\ ")
    text = text.replace("-", "\-")
    text = text.replace("=", "\=")
    text = text.replace("|", "\|")
    text = text.replace("{", "\{")
    text = text.replace("}", "\}")
    text = text.replace(".", "\.")
    text = text.replace("!", "\!")
    return text
def block_title(text):
    for content in blockcontent:
        if content not in text:
            continue
        else:
            return True
    return False
def block_summary(text):
    for content in blockcontent:
        if content not in text:
            continue
        else:
            return True
    return False
def getit(chat_id,token):
    global maxid
    pushdict=[]
    print(f"扫描一次,{maxid}")
    NewsFeed = feedparser.parse("https://rss.nodeseek.com/")
    w=NewsFeed["entries"]
    for i in w:
        if "summary" in i:
            link=i["link"]
            summary=i["summary"]
            title=noraml(i["title"])
            id=int(i["id"])
            author=i["author"]
            pushdict.append([id,author,title,link,summary])
        else:
            link=i["link"]
            title=noraml(i["title"])
            id=int(i["id"])
            author=i["author"]
            pushdict.append([id,author,title,link])
    pushdict.sort(key=lambda a:a[0],reverse=True)
    maxinid=int(pushdict[0][0])
    if maxinid>maxid:
        for listname in pushdict:
            idin=int(listname[0])
            if idin>maxid:
                if len(listname)==5:
                    status_title=block_title(listname[2])
                    status_summary=block_summary(listname[4])
                    if listname[1] not in blocklist and status_title==False and status_summary==False:
                        print(listname,listname[1])
                        r = requests.post(f'https://api.telegram.org/bot{token}/sendMessage', json={"chat_id": chat_id, "text": f"*[{listname[2]}]({listname[3]})*","parse_mode":"MarkdownV2"})
                        print(r.text)
                    else:
                        continue
                else:
                    status_title=block_title(listname[2])
                    if listname[1] not in blocklist and status_title==False:
                        print(listname)
                        r = requests.post(f'https://api.telegram.org/bot{token}/sendMessage', json={"chat_id": chat_id, "text": f"*[{listname[2]}]({listname[3]})*","parse_mode":"MarkdownV2"})
                    else:
                        continue
            else:
                print("over")
                break
    maxid=maxinid
    with open("路径\key.txt","w ") as key:
        key.write(f'{maxid}')
        key.close()
with open("路径\key.txt","r") as key:
    maxid=int(key.read())
chat_id="1234567" #个人tgid
token="asasasa" #tg机器人
blocklist=["bbb","aaa"] #屏蔽的用户名（论坛昵称） 
blockcontent=["新年快乐","五一快乐","出鸡"]   #屏蔽关键词
try:
    getit(chat_id,token)
except:
    getit(chat_id,token)

编写一个sh文件

while true; do
    cd /py && python3 2.py #请修改文件路径和py文件名称
    sleep 10 #执行频率建议不要小于5秒
done

加入linux开机自启动项目

请先参考该网址，开启开机自启动功能

vim /etc/rc.local
在exit 0前加入：bash /path/to/your_script.sh & #请修改文件路径和sh文件名称
赋予权限：chmod  x /path/to/your_script.sh #请修改文件路径和sh文件名称

第二种

不推荐，该方法用自循环，存在ns故障时异常退出的可能性。

import feedparser
import requests
import time
def noraml(text):
    text=text.replace("_","\_")
    text=text.replace("*","\*")
    text=text.replace("[","\[")
    text=text.replace("]","\]")
    text=text.replace("(","\(")
    text=text.replace(")","\)")
    text=text.replace("~","\~")
    text=text.replace("`","\`")
    text=text.replace(">","\>")
    text=text.replace("#","\#")
    text=text.replace(" ","\ ")
    text=text.replace("-","\-")
    text=text.replace("=","\=")
    text=text.replace("|","\|")
    text=text.replace("{","\{")
    text=text.replace("}","\}")
    text=text.replace(".","\.")
    text=text.replace("!","\!")
    return text
def block_title(text):
    for content in blockcontent:
        if content not in text:
            continue
        else:
            return True
    return False
def block_summary(text):
    for content in blockcontent:
        if content not in text:
            continue
        else:
            return True
    return False
def getit(chat_id,token):
    global maxid
    pushdict=[]
    print(f"扫描一次,{maxid}")
    NewsFeed = feedparser.parse("https://rss.nodeseek.com/")
    w=NewsFeed["entries"]
    for i in w:
        if "summary" in i:
            link=i["link"]
            summary=i["summary"]
            title=noraml(i["title"])
            id=int(i["id"])
            author=i["author"]
            pushdict.append([id,author,title,link,summary])
        else:
            link=i["link"]
            title=noraml(i["title"])
            id=int(i["id"])
            author=i["author"]
            pushdict.append([id,author,title,link])
    pushdict.sort(key=lambda a:a[0],reverse=True)
    maxinid=int(pushdict[0][0])
    if maxinid>maxid:
        for listname in pushdict:
            idin=int(listname[0])
            if idin>maxid:
                if len(listname)==5:
                    status_title=block_title(listname[2])
                    status_summary=block_summary(listname[4])
                    if listname[1] not in blocklist and status_title==False and status_summary==False:
                        print(listname,listname[1])
                        r = requests.post(f'https://api.telegram.org/bot{token}/sendMessage', json={"chat_id": chat_id, "text": f"*[{listname[2]}]({listname[3]})*","parse_mode":"MarkdownV2"})
                        print(r.text)
                    else:
                        continue
                else:
                    status_title=block_title(listname[2])
                    if listname[1] not in blocklist and status_title==False:
                        print(listname)
                        r = requests.post(f'https://api.telegram.org/bot{token}/sendMessage', json={"chat_id": chat_id, "text": f"*[{listname[2]}]({listname[3]})*","parse_mode":"MarkdownV2"})
                    else:
                        continue
            else:
                print("over")
                break
        maxid=maxinid
maxid=0
chat_id="1234567" #个人tgid
token="asasasa" #tg机器人
blocklist=["bbb","aaa"] #屏蔽的用户名（论坛昵称） 
blockcontent=["新年快乐","五一快乐","出鸡"]   #屏蔽关键词
try:
    getit(chat_id,token)
except:
    getit(chat_id,token)

代码中需自行修改的地方

chat_id（个人tg id）
token（tg机器人api token）
blocklist（屏蔽用户昵称）
blockcontent（屏蔽关键词）
time.sleep(5) （刷新时间，请合理填写，单位：秒）

所有存在批注的地方都应该更改。

本作品采用知识共享署名-相同方式共享 4.0 国际许可协议进行许可

master319说道：

2024年4月28日下午11:34

大佬好帖，前排支持

回复
在7楼说道：

2024年4月28日下午11:34

可以，好帖BD

回复
nanyi说道：

2024年4月28日下午11:51

好牛逼，我太需要了

回复

Tao_Qi