Thats a good bot

This commit is contained in:
Hugo Levy-Falk 2021-03-07 18:11:31 +01:00
parent 0bfb0badd2
commit 69ef72a039
7 changed files with 662 additions and 1 deletions

5
Dockerfile Normal file
View file

@ -0,0 +1,5 @@
FROM julia:latest
WORKDIR /code
RUN julia -e "import Pkg;Pkg.add(\"/code\")"
ENTRYPOINT julia -e "using TelegramMarkov;run_server()"

View file

@ -1,2 +1,238 @@
# This file is machine-generated - editing it directly is not advised # This file is machine-generated - editing it directly is not advised
[[Artifacts]]
deps = ["Pkg"]
git-tree-sha1 = "c30985d8821e0cd73870b17b0ed0ce6dc44cb744"
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
version = "1.3.0"
[[AssetRegistry]]
deps = ["Distributed", "JSON", "Pidfile", "SHA", "Test"]
git-tree-sha1 = "b25e88db7944f98789130d7b503276bc34bc098e"
uuid = "bf4720bc-e11a-5d0c-854e-bdca1663c893"
version = "0.1.0"
[[Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
[[Compat]]
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
git-tree-sha1 = "919c7f3151e79ff196add81d7f4e45d91bbf420b"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "3.25.0"
[[DataAPI]]
git-tree-sha1 = "8ab70b4de35bb3b8cc19654f6b893cf5164f8ee8"
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
version = "1.5.1"
[[DataStructures]]
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.18.9"
[[Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
[[DelimitedFiles]]
deps = ["Mmap"]
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
[[Distributed]]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
[[FileWatching]]
uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
[[HTTP]]
deps = ["Base64", "Dates", "IniFile", "MbedTLS", "Sockets", "URIs"]
git-tree-sha1 = "63055ee44b5c2b95ec1921edcf856c60124ff0c3"
uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
version = "0.9.2"
[[Hiccup]]
deps = ["MacroTools", "Test"]
git-tree-sha1 = "6187bb2d5fcbb2007c39e7ac53308b0d371124bd"
uuid = "9fb69e20-1954-56bb-a84f-559cc56a8ff7"
version = "0.2.2"
[[IniFile]]
deps = ["Test"]
git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8"
uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f"
version = "0.5.0"
[[InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
[[JLLWrappers]]
git-tree-sha1 = "a431f5f2ca3f4feef3bd7a5e94b8b8d4f2f647a0"
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
version = "1.2.0"
[[JSON]]
deps = ["Dates", "Mmap", "Parsers", "Unicode"]
git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4"
uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
version = "0.21.1"
[[JSON3]]
deps = ["Dates", "Mmap", "Parsers", "StructTypes", "UUIDs"]
git-tree-sha1 = "961ef1c3e5c8a595d5bec270a9007429ef12ed10"
uuid = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
version = "1.5.1"
[[LibGit2]]
deps = ["Printf"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
[[Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
[[LinearAlgebra]]
deps = ["Libdl"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
[[Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
[[MacroTools]]
deps = ["Markdown", "Random"]
git-tree-sha1 = "6a8a2a625ab0dea913aba95c11370589e0239ff0"
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
version = "0.5.6"
[[Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
[[MbedTLS]]
deps = ["Dates", "MbedTLS_jll", "Random", "Sockets"]
git-tree-sha1 = "1c38e51c3d08ef2278062ebceade0e46cefc96fe"
uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
version = "1.0.3"
[[MbedTLS_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "0eef589dd1c26a3ac9d753fe1a8bcad63f956fa6"
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
version = "2.16.8+1"
[[Missings]]
deps = ["DataAPI"]
git-tree-sha1 = "f8c673ccc215eb50fcadb285f522420e29e69e1c"
uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
version = "0.4.5"
[[Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"
[[Mux]]
deps = ["AssetRegistry", "Base64", "HTTP", "Hiccup", "Pkg", "Sockets", "WebSockets"]
git-tree-sha1 = "2578b3cd03e4f568f213c7d51b2118f9e81c2617"
uuid = "a975b10e-0019-58db-a62f-e48ff68538c9"
version = "0.7.5"
[[OrderedCollections]]
git-tree-sha1 = "d45739abcfc03b51f6a42712894a593f74c80a23"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.3.3"
[[Parsers]]
deps = ["Dates"]
git-tree-sha1 = "50c9a9ed8c714945e01cd53a21007ed3865ed714"
uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
version = "1.0.15"
[[Pidfile]]
deps = ["FileWatching", "Test"]
git-tree-sha1 = "1be8660b2064893cd2dae4bd004b589278e4440d"
uuid = "fa939f87-e72e-5be4-a000-7fc836dbe307"
version = "1.2.0"
[[Pkg]]
deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
[[Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
[[REPL]]
deps = ["InteractiveUtils", "Markdown", "Sockets"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
[[Random]]
deps = ["Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
[[SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
[[SharedArrays]]
deps = ["Distributed", "Mmap", "Random", "Serialization"]
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
[[Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
[[SortingAlgorithms]]
deps = ["DataStructures", "Random", "Test"]
git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd"
uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
version = "0.3.1"
[[SparseArrays]]
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
[[Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[[StatsBase]]
deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"]
git-tree-sha1 = "7bab7d4eb46b225b35179632852b595a3162cb61"
uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
version = "0.33.2"
[[StructTypes]]
deps = ["Dates", "UUIDs"]
git-tree-sha1 = "65a43f5218197bc7091b76bc273a5e323a1d7b0d"
uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4"
version = "1.2.3"
[[Telegram]]
deps = ["HTTP", "JSON3"]
git-tree-sha1 = "cc4a954afdbee781570ff9ab5d59251affb2ae4a"
uuid = "1da6f4ae-116c-4c38-8ee9-19974ff3601d"
version = "0.2.2"
[[Test]]
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[[URIs]]
git-tree-sha1 = "7855809b88d7b16e9b029afd17880930626f54a2"
uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
version = "1.2.0"
[[UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[[Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
[[WebSockets]]
deps = ["Base64", "Dates", "HTTP", "Logging", "Sockets"]
git-tree-sha1 = "f91a602e25fe6b89afc93cf02a4ae18ee9384ce3"
uuid = "104b5d7c-a370-577a-8038-80a2059c5097"
version = "1.5.9"

View file

@ -3,6 +3,15 @@ uuid = "3cc71c97-ced3-4d90-8f8e-c7b19a4280af"
authors = ["Hugo Levy-Falk"] authors = ["Hugo Levy-Falk"]
version = "0.1.0" version = "0.1.0"
[deps]
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
Mux = "a975b10e-0019-58db-a62f-e48ff68538c9"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Telegram = "1da6f4ae-116c-4c38-8ee9-19974ff3601d"
[compat] [compat]
julia = "1" julia = "1"

View file

@ -1,5 +1,49 @@
module TelegramMarkov module TelegramMarkov
# Write your package code here. function initialize end
function make_sentence end
function list_usernames end
function fetch_in_env(varname)
if !(varname in keys(ENV))
error_string = """
Please, set $varname in your environment. You can either set it in Julia by doing `ENV["$varname"]=something` or in your shell `export $varname=something`.
"""
@error error_string
""
else
ENV[varname]
end
end
include("bot.jl")
include("markov.jl")
include("server.jl")
import .Markov: initialize, make_sentence, list_usernames
function run_server()
@info "Initializing bot"
Markov.initialize()
application = Server.application()
@info "Starting Telegram not now"
Bot.run_bot(Bot.TELEGRAM_BOT) do message
@debug "Got a message" message
request = Dict(:message=>message[:message], :params=>Dict())
ret = application(request)
@debug "I got this ret" ret
if !isnothing(ret)
chat_id = message[:message][:chat][:id]
reply_to = message[:message][:message_id]
Bot.sendMessage(Bot.TELEGRAM_BOT, chat_id=chat_id, text=ret, reply_to_message_id=reply_to)
end
end
end
export run_server
end end

26
src/bot.jl Normal file
View file

@ -0,0 +1,26 @@
module Bot
export find_username
using Logging
using Telegram, Telegram.API
import ..fetch_in_env
TG_TOKEN = fetch_in_env("TG_TOKEN_MARKOV")
CHAT_ID = parse(Int, fetch_in_env("TG_CHAT_ID"))
TELEGRAM_BOT = TelegramClient(TG_TOKEN, use_globally=false)
function find_username(user_id)
global TELEGRAM_BOT, CHAT_ID
@debug "Fetching user" user_id
u = getChatMember(TELEGRAM_BOT, user_id=user_id, chat_id=CHAT_ID)
u[:user][:username]
end
function register_command(commands...)
global TELEGRAM_BOT
setMyCommands(TELEGRAM_BOT, commands)
end
end

223
src/markov.jl Normal file
View file

@ -0,0 +1,223 @@
module Markov
import JSON
using StatsBase
using Logging
import HTTP
using ..Bot
import ..initialize, ..make_sentence, .. list_usernames, ..fetch_in_env
JSON_FILE = fetch_in_env("INPUT_MARKOV")
Token = Union{String, Nothing}
USERIDS = Dict{String, Int64}()
ANALYSED_SINGLE = Dict{Int64, Dict{Token, Dict{Token, Float64}}}()
ANALYZED_FORWARD = Dict{Int64, Dict{Tuple{Token, Token}, Dict{Token, Float64}}}()
ANALYZED_BACKWARD = Dict{Int64, Dict{Tuple{Token, Token}, Dict{Token, Float64}}}()
INITIALIZED = false
function reset_module()
global INITIALIZED, USERIDS, ANALYSED_SINGLE, ANALYZED_FORWARD, ANALYZED_BACKWARD
USERIDS = Dict{String, Int64}()
ANALYSED_SINGLE = Dict{Int64, Dict{Token, Dict{Token, Float64}}}()
ANALYZED_FORWARD = Dict{Int64, Dict{Tuple{Token, Token}, Dict{Token, Float64}}}()
ANALYZED_BACKWARD = Dict{Int64, Dict{Tuple{Token, Token}, Dict{Token, Float64}}}()
INITIALIZED = false
end
function list_usernames()
global USERIDS
USERIDS |> keys |> collect
end
function list_users()
global USERIDS
USERIDS |> values |> collect
end
function register_user(username, user_id)
global USERIDS
USERIDS[username] = user_id
end
register_user(user_id; default="plop") = begin
username = try
find_username(user_id)
catch e
if isa(e, HTTP.ExceptionRequest.StatusError)
if isnothing(default)
default="deleted"
end
@debug "Could not find username for id $user_id , falling back to default : $default"
default
else
throw(e)
end
end
register_user(username, user_id)
end
function analyse_line(words)
current=nothing
previous=nothing
result_forward = []
result_backward = Pair{Tuple{Token,Token},Token}[(nothing,nothing)=>nothing]
result_single = []
for w in words
result_forward = push!(result_forward, (previous, current)=>w)
result_backward = push!(result_backward, (current, w)=>previous)
result_single = push!(result_single, current=>w)
previous = current
current = w
end
push!(result_forward, (previous,current)=>nothing)
push!(result_forward, (current, nothing)=>nothing)
push!(result_backward, (current, nothing)=>previous)
push!(result_single, current=>nothing)
result_backward, result_forward, result_single
end
function analyse_all_lines(lines)
probabilities_forward = Dict{Tuple{Token, Token}, Dict{Token, Float64}}()
probabilities_backward = Dict{Tuple{Token, Token}, Dict{Token, Float64}}()
probabilities_single = Dict{Token, Dict{Token, Float64}}()
for line in split.(lines)
analysed_backward, analysed_forward, analysed_single = analyse_line(line)
for a in analysed_forward
k = first(a)
v = last(a)
if k keys(probabilities_forward)
probabilities_forward[k] = Dict{Token, Float64}()
end
probabilities_forward[k][v] = get(probabilities_forward[k], v, 0) + 1
end
for a in analysed_backward
k = first(a)
v = last(a)
if k keys(probabilities_backward)
probabilities_backward[k] = Dict{Token, Float64}()
end
probabilities_backward[k][v] = get(probabilities_backward[k], v, 0) + 1
end
for a in analysed_single
k = first(a)
v = last(a)
if k keys(probabilities_single)
probabilities_single[k] = Dict{Token, Float64}()
end
probabilities_single[k][v] = get(probabilities_single[k], v, 0) + 1
end
end
probabilities_backward, probabilities_forward, probabilities_single
end
function initialize(input_file=JSON_FILE; reset=false)
if reset
reset_module()
end
global INITIALIZED
if INITIALIZED
return
end
messages = JSON.parsefile(input_file)["messages"]
user_lines = Dict{Int64, Array{String}}()
for message in messages
if !("from_id" in keys(message))
continue
end
user_id = message["from_id"]
name = message["from"]
text = message["text"]
if typeof(text) == String && length(text)>0
if !(user_id in values(USERIDS))
register_user(user_id; default=name)
end
user_lines[user_id] = vcat(get(user_lines, user_id, []), text)
end
end
Threads.@threads for user in list_users()
ANALYZED_BACKWARD[user], ANALYZED_FORWARD[user], ANALYSED_SINGLE[user] = analyse_all_lines(user_lines[user])
end
INITIALIZED = true
end
function choose_next(user, current)
global ANALYSED_SINGLE
analysed_lines = ANALYSED_SINGLE[user]
items = collect(keys(analysed_lines[current]))
w = weights(collect(values(analysed_lines[current])))
sample(items, w)
end
function choose_next(user, previous, current)
global ANALYZED_FORWARD
analysed_lines = ANALYZED_FORWARD[user]
items = collect(keys(analysed_lines[(previous, current)]))
w = weights(collect(values(analysed_lines[(previous, current)])))
sample(items, w)
end
function choose_prev(user, current, next)
global ANALYZED_BACKWARD
analysed_lines = ANALYZED_BACKWARD[user]
items = collect(keys(analysed_lines[(current, next)]))
w = weights(collect(values(analysed_lines[(current, next)])))
sample(items, w)
end
function make_sentence_forward(user, word1, word2)
previous = word2
current = choose_next(user, word1, word2)
result = []
while !isnothing(current)
result = push!(result, current)
(previous, current) = (current, choose_next(user, previous, current))
end
join(filter(!isnothing, result), " ")
end
function make_sentence_backward(user, word1, word2)
next=word1
current=choose_prev(user, word1, word2)
result = []
while !isnothing(current)
result = pushfirst!(result, current)
(current, next) = (choose_prev(user, current, next), current)
end
join(filter(!isnothing, result), " ")
end
make_sentence(user::Union{Nothing, String}=nothing, word1::Token=nothing, word2::Token=nothing) = begin
try
username, userid = find_user(user)
if !isnothing(word1) && isnothing(word2)
word2 = choose_next(userid, word1)
end
@debug "chose start" word1 word2
start = make_sentence_backward(userid, word1, word2)
@debug "start done" start
finish = make_sentence_forward(userid, word1, word2)
@debug "finish done" finish
join(filter(!isnothing, ["<$username>", ":", start, word1, word2, finish]), " ")
catch e
if isa(e, KeyError)
@debug "Key error" e
"No luck, sorry."
else
throw(e)
end
end
end
is_registered(userid) = begin
global USERIDS
userid in values(USERIDS)
end
find_user(user::String) = begin
global USERIDS
user, USERIDS[user]
end
find_user(::Nothing) = find_user(list_usernames()[rand(1:end)])
end

118
src/server.jl Normal file
View file

@ -0,0 +1,118 @@
module Server
using Markdown
using Mux
using ..Markov
using ..Bot
# Middlewares
function extract_chatid(app, req)
@debug "Looking for chan ID" req
req[:params][:chatid] = req[:message][:chat][:id]
app(req)
end
function ignore_if_not_allowed(app, req)
if req[:params][:chatid] == Bot.CHAT_ID
return app(req)
else
@debug "Ignoring request"
return nothing
end
end
function extract_command(command, parameters...; botname="")
if command[1] == '/'
command = command[2:end]
end
function middleware(app, req)
if !(:entities in keys(req[:message]))
return app(req)
end
@debug "Looking for commands"
command_entities_id = findall(e->e[:type] == "bot_command", req[:message][:entities])
@debug "There are commands" command_entities_id
@debug "Looking for commands names"
commands = map(
e->(
req[:message][:text][
(req[:message][:entities][e][:offset]+1):(req[:message][:entities][e][:offset]+req[:message][:entities][e][:length])
], req[:message][:entities][e]),
command_entities_id
)
@debug "Commands found" commands
first_command_id = findfirst(
c->c[1] == ("/" * command) || c[1] .== ("/" * command * "@" * botname),
commands
)
if isnothing(first_command_id)
return app(req)
end
parameters_values = []
command_offset = commands[first_command_id][2][:offset]
command_length = commands[first_command_id][2][:length]
end_of_text = split(req[:message][:text][command_offset+command_length+1:end])
req[:command] = Dict()
req[:command][:name] = command
req[:command][:parameters] = Dict{Symbol, Union{Nothing, String}}(p=>nothing for p in parameters)
for (i,param) in enumerate(parameters)
if i > length(end_of_text)
break
end
@debug "Parameter attributed" param end_of_text[i]
req[:command][:parameters][param] = end_of_text[i]
end
app(req)
end
end
function branch_to_callback(calback, command)
branching_f(req) = if :command in keys(req)
@debug "branching" req[:command][:name] command
req[:command][:name] == command
else
@debug "branching" req
false
end
branch(branching_f, calback)
end
command(command, callback, parameters...; botname="") = stack(extract_command(command, parameters...; botname=botname), branch_to_callback(callback, command))
# Endpoints
function show_help(req)
"""
Hi ! I'm the Markovian bot. Here is what I can do :
* /help Prints this list of commands !
* /talk [username [word]] Creates a Markov chain for the given `user` (chosen at random if not set) containing `word` (chosen at random if not set)
* /list Lists registered usernames.
🐺
"""
end
function list_usernames(req)
users = join(" * " .* sort(Markov.list_usernames()), "\n")
"""
Here are the users I know of :
""" * users
end
function talk(req)
user = req[:command][:parameters][:user]
word = req[:command][:parameters][:word]
@debug "Time to talk" user word
Markov.make_sentence(user, word)
end
authentication() = stack(extract_chatid, ignore_if_not_allowed)
commands() = stack(command("help", show_help), command("list", list_usernames), command("talk", talk, :user, :word))
application() = mux(stack(authentication(), commands()), _->nothing)
end