有没有办法将数字转换为整数?
我需要将one
转换为one
, two
转换为two
等等。
有没有办法做到这一点与图书馆或类或任何东西?
这个代码的大部分是设置numwords字典,这只在第一次调用完成。
def text2int(textnum, numwords={}): if not numwords: units = [ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", ] tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"] scales = ["hundred", "thousand", "million", "billion", "trillion"] numwords["and"] = (1, 0) for idx, word in enumerate(units): numwords[word] = (1, idx) for idx, word in enumerate(tens): numwords[word] = (1, idx * 10) for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0) current = result = 0 for word in textnum.split(): if word not in numwords: raise Exception("Illegal word: " + word) scale, increment = numwords[word] current = current * scale + increment if scale > 100: result += current current = 0 return result + current print text2int("seven billion one hundred million thirty one thousand three hundred thirty seven") #7100031337
一些较早的post可能会有帮助,即使他们更多地处理文本到数字。 Python:按字母顺序将拼出的数字转换为数字?
来自http://github.com/ghewgill/text2num/tree/master的来自Greg Hewgill的精彩代码片段
我的第一篇文章:)
感谢代码片段…节省了我很多时间!
我需要处理一些额外的parsing案例,比如序数词(“first”,“second”),连字词(“one-hundred”)和连字号(如“57”),所以我加了几条线:
def text2int(textnum, numwords={}): if not numwords: units = [ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", ] tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"] scales = ["hundred", "thousand", "million", "billion", "trillion"] numwords["and"] = (1, 0) for idx, word in enumerate(units): numwords[word] = (1, idx) for idx, word in enumerate(tens): numwords[word] = (1, idx * 10) for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0) ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12} ordinal_endings = [('ieth', 'y'), ('th', '')] textnum = textnum.replace('-', ' ') current = result = 0 for word in textnum.split(): if word in ordinal_words: scale, increment = (1, ordinal_words[word]) else: for ending, replacement in ordinal_endings: if word.endswith(ending): word = "%s%s" % (word[:-len(ending)], replacement) if word not in numwords: raise Exception("Illegal word: " + word) scale, increment = numwords[word] current = current * scale + increment if scale > 100: result += current current = 0 return result + current`
以下是一个简单的例子:
>>> number = {'one':1, ... 'two':2, ... 'three':3,} >>> >>> number['two'] 2
还是你在寻找能处理“一万二千七百二十”的东西 ?
如果任何人有兴趣,我砍了一个版本,维护string的其余部分(虽然它可能有错误,没有太多的testing)。
def text2int (textnum, numwords={}): if not numwords: units = [ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", ] tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"] scales = ["hundred", "thousand", "million", "billion", "trillion"] numwords["and"] = (1, 0) for idx, word in enumerate(units): numwords[word] = (1, idx) for idx, word in enumerate(tens): numwords[word] = (1, idx * 10) for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0) ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12} ordinal_endings = [('ieth', 'y'), ('th', '')] textnum = textnum.replace('-', ' ') current = result = 0 curstring = "" onnumber = False for word in textnum.split(): if word in ordinal_words: scale, increment = (1, ordinal_words[word]) current = current * scale + increment if scale > 100: result += current current = 0 onnumber = True else: for ending, replacement in ordinal_endings: if word.endswith(ending): word = "%s%s" % (word[:-len(ending)], replacement) if word not in numwords: if onnumber: curstring += repr(result + current) + " " curstring += word + " " result = current = 0 onnumber = False else: scale, increment = numwords[word] current = current * scale + increment if scale > 100: result += current current = 0 onnumber = True if onnumber: curstring += repr(result + current) return curstring
例:
>>> text2int("I want fifty five hot dogs for two hundred dollars.") I want 55 hot dogs for 200 dollars.
如果你有“200美元”,可能会有问题。 但是,这真的很粗糙。
如果您想parsing的数字有限,可以很容易地将其硬编码为字典。
对于稍微复杂的情况,您可能希望基于相对简单的数字语法自动生成该字典。 一些沿着这个线(当然,广义…)
for i in range(10): myDict[30 + i] = "thirty-" + singleDigitsDict[i]
如果你需要更广泛的东西,那么看起来你需要自然语言处理工具。 这篇文章可能是一个很好的起点。
这是第一个答案中的代码的C#实现:
public static double ConvertTextToNumber(string text) { string[] units = new string[] { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", }; string[] tens = new string[] {"", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"}; string[] scales = new string[] { "hundred", "thousand", "million", "billion", "trillion" }; Dictionary<string, ScaleIncrementPair> numWord = new Dictionary<string, ScaleIncrementPair>(); numWord.Add("and", new ScaleIncrementPair(1, 0)); for (int i = 0; i < units.Length; i++) { numWord.Add(units[i], new ScaleIncrementPair(1, i)); } for (int i = 1; i < tens.Length; i++) { numWord.Add(tens[i], new ScaleIncrementPair(1, i * 10)); } for (int i = 0; i < scales.Length; i++) { if(i == 0) numWord.Add(scales[i], new ScaleIncrementPair(100, 0)); else numWord.Add(scales[i], new ScaleIncrementPair(Math.Pow(10, (i*3)), 0)); } double current = 0; double result = 0; foreach (var word in text.Split(new char[] { ' ', '-', '—'})) { ScaleIncrementPair scaleIncrement = numWord[word]; current = current * scaleIncrement.scale + scaleIncrement.increment; if (scaleIncrement.scale > 100) { result += current; current = 0; } } return result + current; } public struct ScaleIncrementPair { public double scale; public int increment; public ScaleIncrementPair(double s, int i) { scale = s; increment = i; } }
为了确切的目的,我刚刚向PyPI发布了一个名为word2number的python模块。 https://github.com/akshaynagpal/w2n
安装它使用:
pip install word2number
确保你的点子更新到最新版本。
用法:
from word2number import w2n print w2n.word_to_num("two million three thousand nine hundred and eighty four") 2003984
进行更改,以便text2int(比例)将返回正确的转换。 例如,text2int(“100”)=> 100。
import re numwords = {} def text2int(textnum): if not numwords: units = [ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"] tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"] scales = ["hundred", "thousand", "million", "billion", "trillion", 'quadrillion', 'quintillion', 'sexillion', 'septillion', 'octillion', 'nonillion', 'decillion' ] numwords["and"] = (1, 0) for idx, word in enumerate(units): numwords[word] = (1, idx) for idx, word in enumerate(tens): numwords[word] = (1, idx * 10) for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0) ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12} ordinal_endings = [('ieth', 'y'), ('th', '')] current = result = 0 tokens = re.split(r"[\s-]+", textnum) for word in tokens: if word in ordinal_words: scale, increment = (1, ordinal_words[word]) else: for ending, replacement in ordinal_endings: if word.endswith(ending): word = "%s%s" % (word[:-len(ending)], replacement) if word not in numwords: raise Exception("Illegal word: " + word) scale, increment = numwords[word] if scale > 1: current = max(1, current) current = current * scale + increment if scale > 100: result += current current = 0 return result + current
e_h的C#实现(上面)的快速和肮脏的Java端口。 请注意,这两个返回double,而不是int。
public class Text2Double { public double Text2Double(String text) { String[] units = new String[]{ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", }; String[] tens = new String[]{"", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"}; String[] scales = new String[]{"hundred", "thousand", "million", "billion", "trillion"}; Map<String, ScaleIncrementPair> numWord = new LinkedHashMap<>(); numWord.put("and", new ScaleIncrementPair(1, 0)); for (int i = 0; i < units.length; i++) { numWord.put(units[i], new ScaleIncrementPair(1, i)); } for (int i = 1; i < tens.length; i++) { numWord.put(tens[i], new ScaleIncrementPair(1, i * 10)); } for (int i = 0; i < scales.length; i++) { if (i == 0) numWord.put(scales[i], new ScaleIncrementPair(100, 0)); else numWord.put(scales[i], new ScaleIncrementPair(Math.pow(10, (i * 3)), 0)); } double current = 0; double result = 0; for(String word : text.split("[ -]")) { ScaleIncrementPair scaleIncrement = numWord.get(word); current = current * scaleIncrement.scale + scaleIncrement.increment; if (scaleIncrement.scale > 100) { result += current; current = 0; } } return result + current; } } public class ScaleIncrementPair { public double scale; public int increment; public ScaleIncrementPair(double s, int i) { scale = s; increment = i; } }
一个快速的解决scheme是使用inflect.py来生成翻译字典。
inflect.py有一个number_to_words()
函数,它将把一个数字(例如2
)转换成它的单词forms(例如'two'
)。 不幸的是,它的反向(这将允许你避免翻译字典路线)不提供。 所有相同的,你可以使用该function来build立翻译字典:
>>> import inflect >>> p = inflect.engine() >>> word_to_number_mapping = {} >>> >>> for i in range(1, 100): ... word_form = p.number_to_words(i) # 1 -> 'one' ... word_to_number_mapping[word_form] = i ... >>> print word_to_number_mapping['one'] 1 >>> print word_to_number_mapping['eleven'] 11 >>> print word_to_number_mapping['forty-three'] 43
如果你愿意花点时间,可以检查一下inflect.py的number_to_words()
函数的内部工作,并build立你自己的代码来dynamic地实现这一点(我还没有试过这样做)。
有一个由马克·伯恩斯gemruby是这样做的。 我最近把它分成了几年的支持。 你可以从python调用ruby代码 。
require 'numbers_in_words' require 'numbers_in_words/duck_punch' nums = ["fifteen sixteen", "eighty five sixteen", "nineteen ninety six", "one hundred and seventy nine", "thirteen hundred", "nine thousand two hundred and ninety seven"] nums.each {|n| pn; p n.in_numbers}
结果:
"fifteen sixteen" 1516 "eighty five sixteen" 8516 "nineteen ninety six" 1996 "one hundred and seventy nine" 179 "thirteen hundred" 1300 "nine thousand two hundred and ninety seven" 9297
This code works only for numbers below 99. both word to Int and int to word. (for rest need to implement 10-20 lines of code and simple logic. This is just simple code for beginners) num=input("Enter the number you want to convert : ") mydict={'1': 'One', '2': 'Two', '3': 'Three', '4': 'Four', '5': 'Five','6': 'Six', '7': 'Seven', '8': 'Eight', '9': 'Nine', '10': 'Ten','11': 'Eleven', '12': 'Twelve', '13': 'Thirteen', '14': 'Fourteen', '15': 'Fifteen', '16': 'Sixteen', '17': 'Seventeen', '18': 'Eighteen', '19': 'Nineteen'} mydict2=['','','Twenty','Thirty','Fourty','fifty','sixty','Seventy','Eighty','Ninty'] if num.isdigit(): if(int(num)<20): print(" :---> "+mydict[num]) else: var1=int(num)%10 var2=int(num)/10 print(" :---> "+mydict2[int(var2)]+mydict[str(var1)]) else: num=num.lower(); dict_w={'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10,'eleven':11,'twelve':12,'thirteen':13,'fourteen':14,'fifteen':15,'sixteen':16,'seventeen':'17','eighteen':'18','nineteen':'19'} mydict2=['','','twenty','thirty','fourty','fifty','sixty','seventy','eighty','ninty'] divide=num[num.find("ty")+2:] if num: if(num in dict_w.keys()): print(" :---> "+str(dict_w[num])) elif divide=='' : for i in range(0, len(mydict2)-1): if mydict2[i] == num: print(" :---> "+str(i*10)) else : str3=0 str1=num[num.find("ty")+2:] str2=num[:-len(str1)] for i in range(0, len(mydict2) ): if mydict2[i] == str2: str3=i; if str2 not in mydict2: print("----->Invalid Input<-----") else: try: print(" :---> "+str((str3*10)+dict_w[str1])) except: print("----->Invalid Input<-----") else: print("----->Please Enter Input<-----")