Dominik Macháček
commited on
Commit
·
e62fba3
1
Parent(s):
a365074
line packet commited
Browse filesoriginally from ELITR -- TODO -- change comments
- line_packet.py +94 -0
line_packet.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
"""Functions for sending and receiving individual lines of text over a socket.
|
| 4 |
+
|
| 5 |
+
Used by marian-server-server.py to communicate with the Marian worker.
|
| 6 |
+
|
| 7 |
+
A line is transmitted using one or more fixed-size packets of UTF-8 bytes
|
| 8 |
+
containing:
|
| 9 |
+
|
| 10 |
+
- Zero or more bytes of UTF-8, excluding \n and \0, followed by
|
| 11 |
+
|
| 12 |
+
- Zero or more \0 bytes as required to pad the packet to PACKET_SIZE
|
| 13 |
+
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
PACKET_SIZE = 65536
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def send_one_line(socket, text):
|
| 20 |
+
"""Sends a line of text over the given socket.
|
| 21 |
+
|
| 22 |
+
The 'text' argument should contain a single line of text (line break
|
| 23 |
+
characters are optional). Line boundaries are determined by Python's
|
| 24 |
+
str.splitlines() function [1]. We also count '\0' as a line terminator.
|
| 25 |
+
If 'text' contains multiple lines then only the first will be sent.
|
| 26 |
+
|
| 27 |
+
If the send fails then an exception will be raised.
|
| 28 |
+
|
| 29 |
+
[1] https://docs.python.org/3.5/library/stdtypes.html#str.splitlines
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
socket: a socket object.
|
| 33 |
+
text: string containing a line of text for transmission.
|
| 34 |
+
"""
|
| 35 |
+
text.replace('\0', '\n')
|
| 36 |
+
lines = text.splitlines()
|
| 37 |
+
first_line = '' if len(lines) == 0 else lines[0]
|
| 38 |
+
# TODO Is there a better way of handling bad input than 'replace'?
|
| 39 |
+
data = first_line.encode('utf-8', errors='replace') + b'\n\0'
|
| 40 |
+
for offset in range(0, len(data), PACKET_SIZE):
|
| 41 |
+
bytes_remaining = len(data) - offset
|
| 42 |
+
if bytes_remaining < PACKET_SIZE:
|
| 43 |
+
padding_length = PACKET_SIZE - bytes_remaining
|
| 44 |
+
packet = data[offset:] + b'\0' * padding_length
|
| 45 |
+
else:
|
| 46 |
+
packet = data[offset:offset+PACKET_SIZE]
|
| 47 |
+
socket.sendall(packet)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def receive_one_line(socket):
|
| 51 |
+
"""Receives a line of text from the given socket.
|
| 52 |
+
|
| 53 |
+
This function will (attempt to) receive a single line of text. If data is
|
| 54 |
+
currently unavailable then it will block until data becomes available or
|
| 55 |
+
the sender has closed the connection (in which case it will return an
|
| 56 |
+
empty string).
|
| 57 |
+
|
| 58 |
+
The string should not contain any newline characters, but if it does then
|
| 59 |
+
only the first line will be returned.
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
socket: a socket object.
|
| 63 |
+
|
| 64 |
+
Returns:
|
| 65 |
+
A string representing a single line with a terminating newline or
|
| 66 |
+
None if the connection has been closed.
|
| 67 |
+
"""
|
| 68 |
+
data = b''
|
| 69 |
+
while True:
|
| 70 |
+
packet = socket.recv(PACKET_SIZE)
|
| 71 |
+
if not packet: # Connection has been closed.
|
| 72 |
+
return None
|
| 73 |
+
data += packet
|
| 74 |
+
if b'\0' in packet:
|
| 75 |
+
break
|
| 76 |
+
# TODO Is there a better way of handling bad input than 'replace'?
|
| 77 |
+
text = data.decode('utf-8', errors='replace').strip('\0')
|
| 78 |
+
lines = text.split('\n')
|
| 79 |
+
return lines[0] + '\n'
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def receive_lines(socket):
|
| 83 |
+
try:
|
| 84 |
+
data = socket.recv(PACKET_SIZE)
|
| 85 |
+
except BlockingIOError:
|
| 86 |
+
return []
|
| 87 |
+
if data is None: # Connection has been closed.
|
| 88 |
+
return None
|
| 89 |
+
# TODO Is there a better way of handling bad input than 'replace'?
|
| 90 |
+
text = data.decode('utf-8', errors='replace').strip('\0')
|
| 91 |
+
lines = text.split('\n')
|
| 92 |
+
if len(lines)==1 and not lines[0]:
|
| 93 |
+
return None
|
| 94 |
+
return lines
|