[sysadmin-bin] inactive-gitlab-users.py: rewrite and add language and spam checks
- From: Bartłomiej Piotrowski <bpiotrowski src gnome org>
- To: gnome-sysadmin gnome org,commits-list gnome org
- Subject: [sysadmin-bin] inactive-gitlab-users.py: rewrite and add language and spam checks
- Date: Mon, 22 Jul 2019 10:15:09 +0000 (UTC)
commit 296a10b80e01676dd9890b8cd88ed2b9ad560133
Author: Bartłomiej Piotrowski <bpiotrowski gnome org>
Date: Mon Jul 22 12:06:31 2019 +0200
inactive-gitlab-users.py: rewrite and add language and spam checks
gitlab/inactive-gitlab-users.py | 265 +++++++++++++++++++++++++++++++++++-----
1 file changed, 235 insertions(+), 30 deletions(-)
---
diff --git a/gitlab/inactive-gitlab-users.py b/gitlab/inactive-gitlab-users.py
old mode 100755
new mode 100644
index 78f5eda..088db38
--- a/gitlab/inactive-gitlab-users.py
+++ b/gitlab/inactive-gitlab-users.py
@@ -1,35 +1,240 @@
#!/usr/bin/python
-import dateutil.relativedelta as relativedelta
-import datetime as dt
+from __future__ import print_function
+
+import argparse
+import datetime
+import json
+import os
+import re
+import sys
+
import gitlab
+import polyglot.detect
+import pytz
+from dateutil.parser import parse as dateparser
+from dateutil.relativedelta import relativedelta
+from gitlab.exceptions import GitlabGetError
+from spam.surbl import SurblChecker
+from spam import DomainInexistentException
+
+
+def timestamp2date(timestamp):
+ if timestamp:
+ return timestamp.split("T")[0]
+ else:
+ return None
+
+
+def check_if_spam(url):
+ if not re.match("http[s]?://", url, re.IGNORECASE):
+ url = "https://{}".format(url)
+
+ try:
+ surblchecker = SurblChecker()
+ surbl = surblchecker.is_spam(url)
+ except:
+ surbl = False
+
+ return surbl
+
+
+def get_inactive_users(gl):
+ trusted_domains = [
+ "canonical.com",
+ "debian.org",
+ "endlessm.com",
+ "fedoraproject.org",
+ "gentoo.org",
+ "igalia.com",
+ "gnome.org",
+ "opensuse.org"
+ "redhat.com",
+ "suse.com",
+ "ubuntu.com",
+ ]
+
+ fields = [
+ "username",
+ "email",
+ "id",
+ "bio",
+ "website_url",
+ "created_at",
+ "current_sign_in_at",
+ "last_activity_on",
+ ]
+
+ trusted_users = gl.users.list(custom_attributes={"trusted": "true"}, all=True)
+ users = gl.users.list(as_list=False, order_by="created_at", sort="asc")
+ results = []
+
+ for user in users:
+ attrs = user.attributes
+ userdata = {field: str(attrs[field]) for field in fields}
+
+ if user in trusted_users:
+ continue
+
+ identities = [identity["provider"] for identity in attrs["identities"]]
+ if "ldapmain" in identities:
+ user.customattributes.set("trusted", "true")
+
+ if attrs["email"].split("@")[1] in trusted_domains:
+ user.customattributes.set("trusted", "true")
+ continue
+
+ if attrs["two_factor_enabled"]:
+ user.customattributes.set("trusted", "true")
+ continue
+
+ # Skip user if registered this month
+ timedelta = datetime.datetime.now(pytz.utc) - relativedelta(months=1)
+ if dateparser(attrs["created_at"]) > timedelta:
+ continue
+
+ created_at = timestamp2date(attrs["created_at"])
+ current_sign_in_at = timestamp2date(attrs["current_sign_in_at"])
+
+ # If user logged in only once or never, check if they made any action.
+ if (created_at == current_sign_in_at) or (not current_sign_in_at):
+ events = user.events.list(all=True)
+ if len(events) == 0:
+ userdata['reason'] = "inactivity"
+ results.append(userdata)
+ continue
+
+ if attrs["bio"] and len(attrs["bio"]) > 0:
+ # Some users set URL in bio, check it against surbl
+ if re.match("http[s]?://", attrs["bio"], re.IGNORECASE):
+ if check_if_spam(attrs["bio"]):
+ userdata['reason'] = "spam"
+ results.append(userdata)
+ continue
+
+ # We have a problem with spam accounts with descriptions in some languages
+ try:
+ unwanted_langs = ["id", "es", "fr", "ms", "vi", "pt"]
+ detector = polyglot.detect.Detector(attrs["bio"])
+ lang = detector.language
+
+ if detector.reliable and lang.code in unwanted_langs and lang.confidence > 95:
+ userdata['reason'] = "language"
+ results.append(userdata)
+ continue
+ except polyglot.detect.base.UnknownLanguage:
+ pass
+
+ if attrs["website_url"] and len(attrs["website_url"]) > 0:
+ if check_if_spam(attrs['website_url']):
+ userdata['reason'] = "spam"
+ results.append(userdata)
+ continue
+
+ return results
+
+
+def trust_user(gl, user_id):
+ user = gl.users.get(user_id, lazy=True)
+ user.customattributes.set("trusted", "true")
+ print(user_id)
+
+
+def untrust_user(gl, user_id):
+ user = gl.users.get(user_id, lazy=True)
+ user.customattributes.delete("trusted")
+ print(user_id)
+
+
+def delete_user(gl, user_id):
+ try:
+ gl.users.delete(user_id)
+ print(user_id)
+ except gitlab.exceptions.GitlabDeleteError:
+ pass
+
+
+def trust_all_groups(gl):
+ groups = gl.groups.list(all=True, visibility="public")
+ parent_groups = [grp for grp in groups if not grp.attributes["parent_id"]]
+ members = set()
+
+ for group in parent_groups:
+ group_members = group.members.all(all=True)
+ members.update(group_members)
+
+ for user in members:
+ trust_user(gl, user.id)
+
+
+def trust_2fa_users(gl):
+ users = gl.users.list(all=True, two_factor='enabled', as_list=False)
+
+ for user in users:
+ trust_user(gl, user.id)
+
+
+if __name__ == "__main__":
+ GITLAB_TOKEN = os.getenv("GITLAB_TOKEN")
+ if GITLAB_TOKEN is None:
+ with open("/home/admin/secret/gitlab_rw") as f:
+ tokenfile = f.readline()
+ GITLAB_TOKEN = tokenfile.rstrip().split("=")[1]
+
+ gl = gitlab.Gitlab(
+ "https://gitlab.gnome.org", private_token=GITLAB_TOKEN, per_page=100
+ )
+ gl.auth()
+
+ parser = argparse.ArgumentParser()
+ subparsers = parser.add_subparsers(dest="command")
+
+ inactive = subparsers.add_parser("get-inactive", help="get inactive users")
+ trust_groups = subparsers.add_parser(
+ "trust-groups", help="mark users which are member of any group as trusted"
+ )
+ trust_2fa = subparsers.add_parser(
+ "trust-2fa", help="mark users which have two factor authenticated enabled as trusted"
+ )
+
+ trust = subparsers.add_parser("trust", help="mark users as trusted")
+ trust.add_argument("user_id", nargs="+", help="user IDs to mark as trusted")
+
+ untrust = subparsers.add_parser("untrust", help="mark users as untrusted")
+ untrust.add_argument("user_id", nargs="+", help="user IDs to mark as untrusted")
+
+ delete = subparsers.add_parser("delete", help="delete users")
+ delete.add_argument("user_id", nargs="+", help="user IDs to delete")
+
+ delete_from_file = subparsers.add_parser(
+ "delete-from-json",
+ help="delete users from json file generated with get-inactive",
+ )
+ delete_from_file.add_argument("filename", help="path to json file")
+
+ args = parser.parse_args()
+
+ if args.command == "get-inactive":
+ inactive = get_inactive_users(gl)
+ print(json.dumps(inactive, indent=4, separators=(",", ": ")))
+ elif args.command == "trust":
+ for id in args.user_id:
+ trust_user(gl, id)
+ elif args.command == "untrust":
+ for id in args.user_id:
+ untrust_user(gl, id)
+ elif args.command == "delete":
+ for id in args.user_id:
+ delete_user(gl, id)
+ elif args.command == "delete-from-json":
+ with open(args.filename, "r") as f:
+ users = json.load(f)
-execfile('/home/admin/secret/gitlab_rw')
-gl = gitlab.Gitlab('https://gitlab.gnome.org', GITLAB_PRIVATE_RW_TOKEN, api_version=4)
-
-users = gl.users.list(all=True, per_page=100)
-today = dt.date.today()
-timedelta = relativedelta.relativedelta(months=3)
-
-whitelist = ['debian', 'ubuntu', 'redhat',
- 'canonical', 'suse', 'fedoraproject',
- 'gnome', 'gentoo']
-
-print 'username,email,id,created_at,current_sign_in_at'
-
-is_ldap = False
-for user in users:
- for index, _ in enumerate(user.attributes['identities']):
- if user.attributes['identities'][index]['provider'] == 'ldapmain':
- is_ldap = True
-
- if not is_ldap:
- if user.attributes['username'] != 'ghost':
- if not user.attributes['last_activity_on'] and user.attributes['current_sign_in_at']:
- if user.attributes['email'].split('@')[1].split('.')[-2] not in whitelist:
- if user.attributes['created_at'].split('T')[0] ==
user.attributes['current_sign_in_at'].split('T')[0]:
- if len(user.events.list()) == 0:
- if dt.datetime.strptime(user.attributes['created_at'].split('T')[0],
'%Y-%m-%d').date() < (today - timedelta):
- print '{},{},{},{},{}'.format(user.attributes['username'],
user.attributes['email'], user.attributes['id'], user.attributes['created_at'].split('T')[0],
user.attributes['current_sign_in_at'].split('T')[0])
+ for user in users:
+ delete_user(gl, user["id"])
+ elif args.command == "trust-groups":
+ trust_all_groups(gl)
+ elif args.command == "trust-2fa":
+ trust_2fa_users(gl)
else:
- is_ldap = False
+ parser.print_help()
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]